diff --git a/src/hotspot/cpu/aarch64/aarch64_vector.ad b/src/hotspot/cpu/aarch64/aarch64_vector.ad index b14295ca15c..1ebc6408a60 100644 --- a/src/hotspot/cpu/aarch64/aarch64_vector.ad +++ b/src/hotspot/cpu/aarch64/aarch64_vector.ad @@ -135,9 +135,9 @@ source %{ (opcode == Op_VectorCastL2X && bt == T_FLOAT) || (opcode == Op_CountLeadingZerosV && bt == T_LONG) || (opcode == Op_CountTrailingZerosV && bt == T_LONG) || - // The vector implementation of Op_AddReductionVD/F is for the Vector API only. - // It is not suitable for auto-vectorization because it does not add the elements - // in the same order as sequential code, and FP addition is non-associative. + // The implementations of Op_AddReductionVD/F in Neon are for the Vector API only. + // They are not suitable for auto-vectorization because the result would not conform + // to the JLS, Section Evaluation Order. opcode == Op_AddReductionVD || opcode == Op_AddReductionVF || opcode == Op_MulReductionVD || opcode == Op_MulReductionVF || opcode == Op_MulVL) { @@ -2858,14 +2858,14 @@ instruct reduce_addL_sve(iRegLNoSp dst, iRegL isrc, vReg vsrc, vRegD tmp) %{ %} // reduction addF -// Floating-point addition is not associative, so the rules for AddReductionVF -// on NEON can't be used to auto-vectorize floating-point reduce-add. -// Currently, on NEON, AddReductionVF is only generated by Vector API. -instruct reduce_add2F_neon(vRegF dst, vRegF fsrc, vReg vsrc) %{ - predicate(UseSVE == 0 && Matcher::vector_length(n->in(2)) == 2); + +instruct reduce_non_strict_order_add2F_neon(vRegF dst, vRegF fsrc, vReg vsrc) %{ + // Non-strictly ordered floating-point add reduction for a 64-bits-long vector. This rule is + // intended for the VectorAPI (which allows for non-strictly ordered add reduction). + predicate(Matcher::vector_length(n->in(2)) == 2 && !n->as_Reduction()->requires_strict_order()); match(Set dst (AddReductionVF fsrc vsrc)); effect(TEMP_DEF dst); - format %{ "reduce_add2F_neon $dst, $fsrc, $vsrc" %} + format %{ "reduce_non_strict_order_add2F_neon $dst, $fsrc, $vsrc" %} ins_encode %{ __ faddp($dst$$FloatRegister, $vsrc$$FloatRegister, __ S); __ fadds($dst$$FloatRegister, $dst$$FloatRegister, $fsrc$$FloatRegister); @@ -2873,11 +2873,13 @@ instruct reduce_add2F_neon(vRegF dst, vRegF fsrc, vReg vsrc) %{ ins_pipe(pipe_slow); %} -instruct reduce_add4F_neon(vRegF dst, vRegF fsrc, vReg vsrc, vReg tmp) %{ - predicate(UseSVE == 0 && Matcher::vector_length(n->in(2)) == 4); +instruct reduce_non_strict_order_add4F_neon(vRegF dst, vRegF fsrc, vReg vsrc, vReg tmp) %{ + // Non-strictly ordered floating-point add reduction for 128-bits-long vector. This rule is + // intended for the VectorAPI (which allows for non-strictly ordered add reduction). + predicate(Matcher::vector_length(n->in(2)) == 4 && !n->as_Reduction()->requires_strict_order()); match(Set dst (AddReductionVF fsrc vsrc)); effect(TEMP_DEF dst, TEMP tmp); - format %{ "reduce_add4F_neon $dst, $fsrc, $vsrc\t# KILL $tmp" %} + format %{ "reduce_non_strict_order_add4F_neon $dst, $fsrc, $vsrc\t# KILL $tmp" %} ins_encode %{ __ faddp($tmp$$FloatRegister, __ T4S, $vsrc$$FloatRegister, $vsrc$$FloatRegister); __ faddp($dst$$FloatRegister, $tmp$$FloatRegister, __ S); @@ -2886,11 +2888,21 @@ instruct reduce_add4F_neon(vRegF dst, vRegF fsrc, vReg vsrc, vReg tmp) %{ ins_pipe(pipe_slow); %} +// This rule calculates the reduction result in strict order. Two cases will +// reach here: +// 1. Non strictly-ordered AddReductionVF when vector size > 128-bits. For example - +// AddReductionVF generated by Vector API. For vector size > 128-bits, it is more +// beneficial performance-wise to generate direct SVE instruction even if it is +// strictly ordered. +// 2. Strictly-ordered AddReductionVF. For example - AddReductionVF generated by +// auto-vectorization on SVE machine. instruct reduce_addF_sve(vRegF dst_src1, vReg src2) %{ - predicate(UseSVE > 0); + predicate(!VM_Version::use_neon_for_vector(Matcher::vector_length_in_bytes(n->in(2))) || + n->as_Reduction()->requires_strict_order()); match(Set dst_src1 (AddReductionVF dst_src1 src2)); format %{ "reduce_addF_sve $dst_src1, $dst_src1, $src2" %} ins_encode %{ + assert(UseSVE > 0, "must be sve"); uint length_in_bytes = Matcher::vector_length_in_bytes(this, $src2); assert(length_in_bytes == MaxVectorSize, "invalid vector length"); __ sve_fadda($dst_src1$$FloatRegister, __ S, ptrue, $src2$$FloatRegister); @@ -2899,14 +2911,14 @@ instruct reduce_addF_sve(vRegF dst_src1, vReg src2) %{ %} // reduction addD -// Floating-point addition is not associative, so the rule for AddReductionVD -// on NEON can't be used to auto-vectorize floating-point reduce-add. -// Currently, on NEON, AddReductionVD is only generated by Vector API. -instruct reduce_addD_neon(vRegD dst, vRegD dsrc, vReg vsrc) %{ - predicate(UseSVE == 0); + +instruct reduce_non_strict_order_add2D_neon(vRegD dst, vRegD dsrc, vReg vsrc) %{ + // Non-strictly ordered floating-point add reduction for doubles. This rule is + // intended for the VectorAPI (which allows for non-strictly ordered add reduction). + predicate(!n->as_Reduction()->requires_strict_order()); match(Set dst (AddReductionVD dsrc vsrc)); effect(TEMP_DEF dst); - format %{ "reduce_addD_neon $dst, $dsrc, $vsrc\t# 2D" %} + format %{ "reduce_non_strict_order_add2D_neon $dst, $dsrc, $vsrc\t# 2D" %} ins_encode %{ __ faddp($dst$$FloatRegister, $vsrc$$FloatRegister, __ D); __ faddd($dst$$FloatRegister, $dst$$FloatRegister, $dsrc$$FloatRegister); @@ -2914,11 +2926,21 @@ instruct reduce_addD_neon(vRegD dst, vRegD dsrc, vReg vsrc) %{ ins_pipe(pipe_slow); %} +// This rule calculates the reduction result in strict order. Two cases will +// reach here: +// 1. Non strictly-ordered AddReductionVD when vector size > 128-bits. For example - +// AddReductionVD generated by Vector API. For vector size > 128-bits, it is more +// beneficial performance-wise to generate direct SVE instruction even if it is +// strictly ordered. +// 2. Strictly-ordered AddReductionVD. For example - AddReductionVD generated by +// auto-vectorization on SVE machine. instruct reduce_addD_sve(vRegD dst_src1, vReg src2) %{ - predicate(UseSVE > 0); + predicate(!VM_Version::use_neon_for_vector(Matcher::vector_length_in_bytes(n->in(2))) || + n->as_Reduction()->requires_strict_order()); match(Set dst_src1 (AddReductionVD dst_src1 src2)); format %{ "reduce_addD_sve $dst_src1, $dst_src1, $src2" %} ins_encode %{ + assert(UseSVE > 0, "must be sve"); uint length_in_bytes = Matcher::vector_length_in_bytes(this, $src2); assert(length_in_bytes == MaxVectorSize, "invalid vector length"); __ sve_fadda($dst_src1$$FloatRegister, __ D, ptrue, $src2$$FloatRegister); diff --git a/src/hotspot/cpu/aarch64/aarch64_vector_ad.m4 b/src/hotspot/cpu/aarch64/aarch64_vector_ad.m4 index 060bb4a11d4..29f92772368 100644 --- a/src/hotspot/cpu/aarch64/aarch64_vector_ad.m4 +++ b/src/hotspot/cpu/aarch64/aarch64_vector_ad.m4 @@ -125,9 +125,9 @@ source %{ (opcode == Op_VectorCastL2X && bt == T_FLOAT) || (opcode == Op_CountLeadingZerosV && bt == T_LONG) || (opcode == Op_CountTrailingZerosV && bt == T_LONG) || - // The vector implementation of Op_AddReductionVD/F is for the Vector API only. - // It is not suitable for auto-vectorization because it does not add the elements - // in the same order as sequential code, and FP addition is non-associative. + // The implementations of Op_AddReductionVD/F in Neon are for the Vector API only. + // They are not suitable for auto-vectorization because the result would not conform + // to the JLS, Section Evaluation Order. opcode == Op_AddReductionVD || opcode == Op_AddReductionVF || opcode == Op_MulReductionVD || opcode == Op_MulReductionVF || opcode == Op_MulVL) { @@ -1752,14 +1752,14 @@ REDUCE_ADD_INT_NEON_SVE_PAIRWISE(I, iRegIorL2I) REDUCE_ADD_INT_NEON_SVE_PAIRWISE(L, iRegL) // reduction addF -// Floating-point addition is not associative, so the rules for AddReductionVF -// on NEON can't be used to auto-vectorize floating-point reduce-add. -// Currently, on NEON, AddReductionVF is only generated by Vector API. -instruct reduce_add2F_neon(vRegF dst, vRegF fsrc, vReg vsrc) %{ - predicate(UseSVE == 0 && Matcher::vector_length(n->in(2)) == 2); + +instruct reduce_non_strict_order_add2F_neon(vRegF dst, vRegF fsrc, vReg vsrc) %{ + // Non-strictly ordered floating-point add reduction for a 64-bits-long vector. This rule is + // intended for the VectorAPI (which allows for non-strictly ordered add reduction). + predicate(Matcher::vector_length(n->in(2)) == 2 && !n->as_Reduction()->requires_strict_order()); match(Set dst (AddReductionVF fsrc vsrc)); effect(TEMP_DEF dst); - format %{ "reduce_add2F_neon $dst, $fsrc, $vsrc" %} + format %{ "reduce_non_strict_order_add2F_neon $dst, $fsrc, $vsrc" %} ins_encode %{ __ faddp($dst$$FloatRegister, $vsrc$$FloatRegister, __ S); __ fadds($dst$$FloatRegister, $dst$$FloatRegister, $fsrc$$FloatRegister); @@ -1767,11 +1767,13 @@ instruct reduce_add2F_neon(vRegF dst, vRegF fsrc, vReg vsrc) %{ ins_pipe(pipe_slow); %} -instruct reduce_add4F_neon(vRegF dst, vRegF fsrc, vReg vsrc, vReg tmp) %{ - predicate(UseSVE == 0 && Matcher::vector_length(n->in(2)) == 4); +instruct reduce_non_strict_order_add4F_neon(vRegF dst, vRegF fsrc, vReg vsrc, vReg tmp) %{ + // Non-strictly ordered floating-point add reduction for 128-bits-long vector. This rule is + // intended for the VectorAPI (which allows for non-strictly ordered add reduction). + predicate(Matcher::vector_length(n->in(2)) == 4 && !n->as_Reduction()->requires_strict_order()); match(Set dst (AddReductionVF fsrc vsrc)); effect(TEMP_DEF dst, TEMP tmp); - format %{ "reduce_add4F_neon $dst, $fsrc, $vsrc\t# KILL $tmp" %} + format %{ "reduce_non_strict_order_add4F_neon $dst, $fsrc, $vsrc\t# KILL $tmp" %} ins_encode %{ __ faddp($tmp$$FloatRegister, __ T4S, $vsrc$$FloatRegister, $vsrc$$FloatRegister); __ faddp($dst$$FloatRegister, $tmp$$FloatRegister, __ S); @@ -1783,11 +1785,21 @@ dnl dnl REDUCE_ADD_FP_SVE($1, $2 ) dnl REDUCE_ADD_FP_SVE(type, size) define(`REDUCE_ADD_FP_SVE', ` +// This rule calculates the reduction result in strict order. Two cases will +// reach here: +// 1. Non strictly-ordered AddReductionV$1 when vector size > 128-bits. For example - +// AddReductionV$1 generated by Vector API. For vector size > 128-bits, it is more +// beneficial performance-wise to generate direct SVE instruction even if it is +// strictly ordered. +// 2. Strictly-ordered AddReductionV$1. For example - AddReductionV$1 generated by +// auto-vectorization on SVE machine. instruct reduce_add$1_sve(vReg$1 dst_src1, vReg src2) %{ - predicate(UseSVE > 0); + predicate(!VM_Version::use_neon_for_vector(Matcher::vector_length_in_bytes(n->in(2))) || + n->as_Reduction()->requires_strict_order()); match(Set dst_src1 (AddReductionV$1 dst_src1 src2)); format %{ "reduce_add$1_sve $dst_src1, $dst_src1, $src2" %} ins_encode %{ + assert(UseSVE > 0, "must be sve"); uint length_in_bytes = Matcher::vector_length_in_bytes(this, $src2); assert(length_in_bytes == MaxVectorSize, "invalid vector length"); __ sve_fadda($dst_src1$$FloatRegister, __ $2, ptrue, $src2$$FloatRegister); @@ -1798,14 +1810,14 @@ dnl REDUCE_ADD_FP_SVE(F, S) // reduction addD -// Floating-point addition is not associative, so the rule for AddReductionVD -// on NEON can't be used to auto-vectorize floating-point reduce-add. -// Currently, on NEON, AddReductionVD is only generated by Vector API. -instruct reduce_addD_neon(vRegD dst, vRegD dsrc, vReg vsrc) %{ - predicate(UseSVE == 0); + +instruct reduce_non_strict_order_add2D_neon(vRegD dst, vRegD dsrc, vReg vsrc) %{ + // Non-strictly ordered floating-point add reduction for doubles. This rule is + // intended for the VectorAPI (which allows for non-strictly ordered add reduction). + predicate(!n->as_Reduction()->requires_strict_order()); match(Set dst (AddReductionVD dsrc vsrc)); effect(TEMP_DEF dst); - format %{ "reduce_addD_neon $dst, $dsrc, $vsrc\t# 2D" %} + format %{ "reduce_non_strict_order_add2D_neon $dst, $dsrc, $vsrc\t# 2D" %} ins_encode %{ __ faddp($dst$$FloatRegister, $vsrc$$FloatRegister, __ D); __ faddd($dst$$FloatRegister, $dst$$FloatRegister, $dsrc$$FloatRegister); diff --git a/src/hotspot/cpu/aarch64/nativeInst_aarch64.hpp b/src/hotspot/cpu/aarch64/nativeInst_aarch64.hpp index 84caef57f87..974214d985b 100644 --- a/src/hotspot/cpu/aarch64/nativeInst_aarch64.hpp +++ b/src/hotspot/cpu/aarch64/nativeInst_aarch64.hpp @@ -168,6 +168,7 @@ class NativeCall: public NativeInstruction { return_address_offset = 4 }; + static int byte_size() { return instruction_size; } address instruction_address() const { return addr_at(instruction_offset); } address next_instruction_address() const { return addr_at(return_address_offset); } int displacement() const { return (int_at(displacement_offset) << 6) >> 4; } diff --git a/src/hotspot/cpu/arm/nativeInst_arm_32.hpp b/src/hotspot/cpu/arm/nativeInst_arm_32.hpp index 15b57188730..e26c23cd983 100644 --- a/src/hotspot/cpu/arm/nativeInst_arm_32.hpp +++ b/src/hotspot/cpu/arm/nativeInst_arm_32.hpp @@ -415,6 +415,7 @@ inline NativeJump* nativeJump_at(address address) { class NativeCall: public RawNativeCall { public: + static int byte_size() { return instruction_size; } // NativeCall::next_instruction_address() is used only to define the // range where to look for the relocation information. We need not // walk over composed instructions (as long as the relocation information diff --git a/src/hotspot/cpu/ppc/nativeInst_ppc.hpp b/src/hotspot/cpu/ppc/nativeInst_ppc.hpp index 113cedfee7c..f21d76f8a67 100644 --- a/src/hotspot/cpu/ppc/nativeInst_ppc.hpp +++ b/src/hotspot/cpu/ppc/nativeInst_ppc.hpp @@ -137,6 +137,8 @@ class NativeCall: public NativeInstruction { instruction_size = 16 // Used in shared code for calls with reloc_info. }; + static int byte_size() { return instruction_size; } + static bool is_call_at(address a) { return Assembler::is_bl(*(int*)(a)); } diff --git a/src/hotspot/cpu/riscv/nativeInst_riscv.hpp b/src/hotspot/cpu/riscv/nativeInst_riscv.hpp index e9b3624d9d2..f925f8950aa 100644 --- a/src/hotspot/cpu/riscv/nativeInst_riscv.hpp +++ b/src/hotspot/cpu/riscv/nativeInst_riscv.hpp @@ -126,6 +126,7 @@ class NativeCall: public NativeInstruction { return_address_offset = 4 }; + static int byte_size() { return instruction_size; } address instruction_address() const { return addr_at(instruction_offset); } address next_instruction_address() const { return addr_at(return_address_offset); } address return_address() const { return addr_at(return_address_offset); } diff --git a/src/hotspot/cpu/s390/nativeInst_s390.hpp b/src/hotspot/cpu/s390/nativeInst_s390.hpp index 13f15224f8b..8003e1d42f2 100644 --- a/src/hotspot/cpu/s390/nativeInst_s390.hpp +++ b/src/hotspot/cpu/s390/nativeInst_s390.hpp @@ -212,6 +212,7 @@ class NativeCall: public NativeInstruction { call_far_pcrelative_displacement_alignment = 4 }; + static int byte_size() { return instruction_size; } // Maximum size (in bytes) of a call to an absolute address. // Used when emitting call to deopt handler blob, which is a diff --git a/src/hotspot/cpu/x86/nativeInst_x86.hpp b/src/hotspot/cpu/x86/nativeInst_x86.hpp index f8cbf70f189..70cb6179366 100644 --- a/src/hotspot/cpu/x86/nativeInst_x86.hpp +++ b/src/hotspot/cpu/x86/nativeInst_x86.hpp @@ -160,6 +160,7 @@ class NativeCall: public NativeInstruction { return_address_offset = 5 }; + static int byte_size() { return instruction_size; } address instruction_address() const { return addr_at(instruction_offset); } address next_instruction_address() const { return addr_at(return_address_offset); } int displacement() const { return (jint) int_at(displacement_offset); } diff --git a/src/hotspot/cpu/zero/nativeInst_zero.hpp b/src/hotspot/cpu/zero/nativeInst_zero.hpp index 77a7d511ac5..2f3d9b80617 100644 --- a/src/hotspot/cpu/zero/nativeInst_zero.hpp +++ b/src/hotspot/cpu/zero/nativeInst_zero.hpp @@ -70,6 +70,8 @@ class NativeCall : public NativeInstruction { instruction_size = 0 // not used within the interpreter }; + static int byte_size() { return instruction_size; } + address instruction_address() const { ShouldNotCallThis(); return nullptr; diff --git a/src/hotspot/share/code/nmethod.inline.hpp b/src/hotspot/share/code/nmethod.inline.hpp index 4af4d3ffaed..49af1e0b95f 100644 --- a/src/hotspot/share/code/nmethod.inline.hpp +++ b/src/hotspot/share/code/nmethod.inline.hpp @@ -37,7 +37,7 @@ inline bool nmethod::is_deopt_pc(address pc) { return is_deopt_entry(pc) || is_d inline bool nmethod::is_deopt_entry(address pc) { return pc == deopt_handler_begin() #if INCLUDE_JVMCI - || (is_compiled_by_jvmci() && pc == (deopt_handler_begin() + NativeCall::instruction_size)) + || (is_compiled_by_jvmci() && pc == (deopt_handler_begin() + NativeCall::byte_size())) #endif ; } @@ -45,7 +45,7 @@ inline bool nmethod::is_deopt_entry(address pc) { inline bool nmethod::is_deopt_mh_entry(address pc) { return pc == deopt_mh_handler_begin() #if INCLUDE_JVMCI - || (is_compiled_by_jvmci() && pc == (deopt_mh_handler_begin() + NativeCall::instruction_size)) + || (is_compiled_by_jvmci() && pc == (deopt_mh_handler_begin() + NativeCall::byte_size())) #endif ; } diff --git a/src/hotspot/share/gc/g1/g1CollectedHeap.inline.hpp b/src/hotspot/share/gc/g1/g1CollectedHeap.inline.hpp index ed594d72d06..49f1c82a98a 100644 --- a/src/hotspot/share/gc/g1/g1CollectedHeap.inline.hpp +++ b/src/hotspot/share/gc/g1/g1CollectedHeap.inline.hpp @@ -227,7 +227,7 @@ void G1CollectedHeap::register_region_with_region_attr(G1HeapRegion* r) { void G1CollectedHeap::register_old_region_with_region_attr(G1HeapRegion* r) { assert(!r->has_pinned_objects(), "must be"); assert(r->rem_set()->is_complete(), "must be"); - _region_attr.set_in_old(r->hrm_index(), r->rem_set()->is_tracked()); + _region_attr.set_in_old(r->hrm_index(), true); _rem_set->exclude_region_from_scan(r->hrm_index()); } diff --git a/src/hotspot/share/opto/loopnode.hpp b/src/hotspot/share/opto/loopnode.hpp index ad603439e59..90ef4da4f1e 100644 --- a/src/hotspot/share/opto/loopnode.hpp +++ b/src/hotspot/share/opto/loopnode.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 1998, 2023, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1998, 2024, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -1460,7 +1460,7 @@ class PhaseIdealLoop : public PhaseTransform { }; AutoVectorizeStatus auto_vectorize(IdealLoopTree* lpt, VSharedData &vshared); - // Move UnorderedReduction out of loop if possible + // Move an unordered Reduction out of loop if possible void move_unordered_reduction_out_of_loop(IdealLoopTree* loop); // Create a scheduled list of nodes control dependent on ctrl set. diff --git a/src/hotspot/share/opto/loopopts.cpp b/src/hotspot/share/opto/loopopts.cpp index a3227d47832..b0effb6d4f5 100644 --- a/src/hotspot/share/opto/loopopts.cpp +++ b/src/hotspot/share/opto/loopopts.cpp @@ -4310,11 +4310,19 @@ PhaseIdealLoop::auto_vectorize(IdealLoopTree* lpt, VSharedData &vshared) { return AutoVectorizeStatus::Success; } +// Returns true if the Reduction node is unordered. +static bool is_unordered_reduction(Node* n) { + return n->is_Reduction() && !n->as_Reduction()->requires_strict_order(); +} + // Having ReductionNodes in the loop is expensive. They need to recursively // fold together the vector values, for every vectorized loop iteration. If // we encounter the following pattern, we can vector accumulate the values // inside the loop, and only have a single UnorderedReduction after the loop. // +// Note: UnorderedReduction represents a ReductionNode which does not require +// calculating in strict order. +// // CountedLoop init // | | // +------+ | +-----------------------+ @@ -4354,21 +4362,24 @@ PhaseIdealLoop::auto_vectorize(IdealLoopTree* lpt, VSharedData &vshared) { // wise. This is a single operation per vector_accumulator, rather than many // for a UnorderedReduction. We can then reduce the last vector_accumulator // after the loop, and also reduce the init value into it. +// // We can not do this with all reductions. Some reductions do not allow the -// reordering of operations (for example float addition). +// reordering of operations (for example float addition/multiplication require +// strict order). void PhaseIdealLoop::move_unordered_reduction_out_of_loop(IdealLoopTree* loop) { assert(!C->major_progress() && loop->is_counted() && loop->is_innermost(), "sanity"); - // Find all Phi nodes with UnorderedReduction on backedge. + // Find all Phi nodes with an unordered Reduction on backedge. CountedLoopNode* cl = loop->_head->as_CountedLoop(); for (DUIterator_Fast jmax, j = cl->fast_outs(jmax); j < jmax; j++) { Node* phi = cl->fast_out(j); - // We have a phi with a single use, and a UnorderedReduction on the backedge. - if (!phi->is_Phi() || phi->outcnt() != 1 || !phi->in(2)->is_UnorderedReduction()) { + // We have a phi with a single use, and an unordered Reduction on the backedge. + if (!phi->is_Phi() || phi->outcnt() != 1 || !is_unordered_reduction(phi->in(2))) { continue; } - UnorderedReductionNode* last_ur = phi->in(2)->as_UnorderedReduction(); + ReductionNode* last_ur = phi->in(2)->as_Reduction(); + assert(!last_ur->requires_strict_order(), "must be"); // Determine types const TypeVect* vec_t = last_ur->vect_type(); @@ -4385,14 +4396,14 @@ void PhaseIdealLoop::move_unordered_reduction_out_of_loop(IdealLoopTree* loop) { continue; // not implemented -> fails } - // Traverse up the chain of UnorderedReductions, checking that it loops back to - // the phi. Check that all UnorderedReductions only have a single use, except for + // Traverse up the chain of unordered Reductions, checking that it loops back to + // the phi. Check that all unordered Reductions only have a single use, except for // the last (last_ur), which only has phi as a use in the loop, and all other uses // are outside the loop. - UnorderedReductionNode* current = last_ur; - UnorderedReductionNode* first_ur = nullptr; + ReductionNode* current = last_ur; + ReductionNode* first_ur = nullptr; while (true) { - assert(current->is_UnorderedReduction(), "sanity"); + assert(!current->requires_strict_order(), "sanity"); // Expect no ctrl and a vector_input from within the loop. Node* ctrl = current->in(0); @@ -4409,7 +4420,7 @@ void PhaseIdealLoop::move_unordered_reduction_out_of_loop(IdealLoopTree* loop) { break; // Chain traversal fails. } - // Expect single use of UnorderedReduction, except for last_ur. + // Expect single use of an unordered Reduction, except for last_ur. if (current == last_ur) { // Expect all uses to be outside the loop, except phi. for (DUIterator_Fast kmax, k = current->fast_outs(kmax); k < kmax; k++) { @@ -4427,12 +4438,13 @@ void PhaseIdealLoop::move_unordered_reduction_out_of_loop(IdealLoopTree* loop) { } } - // Expect another UnorderedReduction or phi as the scalar input. + // Expect another unordered Reduction or phi as the scalar input. Node* scalar_input = current->in(1); - if (scalar_input->is_UnorderedReduction() && + if (is_unordered_reduction(scalar_input) && scalar_input->Opcode() == current->Opcode()) { - // Move up the UnorderedReduction chain. - current = scalar_input->as_UnorderedReduction(); + // Move up the unordered Reduction chain. + current = scalar_input->as_Reduction(); + assert(!current->requires_strict_order(), "must be"); } else if (scalar_input == phi) { // Chain terminates at phi. first_ur = current; @@ -4456,7 +4468,7 @@ void PhaseIdealLoop::move_unordered_reduction_out_of_loop(IdealLoopTree* loop) { VectorNode* identity_vector = VectorNode::scalar2vector(identity_scalar, vector_length, bt_t); register_new_node(identity_vector, C->root()); assert(vec_t == identity_vector->vect_type(), "matching vector type"); - VectorNode::trace_new_vector(identity_vector, "UnorderedReduction"); + VectorNode::trace_new_vector(identity_vector, "Unordered Reduction"); // Turn the scalar phi into a vector phi. _igvn.rehash_node_delayed(phi); @@ -4465,7 +4477,7 @@ void PhaseIdealLoop::move_unordered_reduction_out_of_loop(IdealLoopTree* loop) { phi->as_Type()->set_type(vec_t); _igvn.set_type(phi, vec_t); - // Traverse down the chain of UnorderedReductions, and replace them with vector_accumulators. + // Traverse down the chain of unordered Reductions, and replace them with vector_accumulators. current = first_ur; while (true) { // Create vector_accumulator to replace current. @@ -4474,11 +4486,12 @@ void PhaseIdealLoop::move_unordered_reduction_out_of_loop(IdealLoopTree* loop) { VectorNode* vector_accumulator = VectorNode::make(vopc, last_vector_accumulator, vector_input, vec_t); register_new_node(vector_accumulator, cl); _igvn.replace_node(current, vector_accumulator); - VectorNode::trace_new_vector(vector_accumulator, "UnorderedReduction"); + VectorNode::trace_new_vector(vector_accumulator, "Unordered Reduction"); if (current == last_ur) { break; } - current = vector_accumulator->unique_out()->as_UnorderedReduction(); + current = vector_accumulator->unique_out()->as_Reduction(); + assert(!current->requires_strict_order(), "must be"); } // Create post-loop reduction. @@ -4495,7 +4508,7 @@ void PhaseIdealLoop::move_unordered_reduction_out_of_loop(IdealLoopTree* loop) { } } register_new_node(post_loop_reduction, get_late_ctrl(post_loop_reduction, cl)); - VectorNode::trace_new_vector(post_loop_reduction, "UnorderedReduction"); + VectorNode::trace_new_vector(post_loop_reduction, "Unordered Reduction"); assert(last_accumulator->outcnt() == 2, "last_accumulator has 2 uses: phi and post_loop_reduction"); assert(post_loop_reduction->outcnt() > 0, "should have taken over all non loop uses of last_accumulator"); diff --git a/src/hotspot/share/opto/node.hpp b/src/hotspot/share/opto/node.hpp index dc9dc6654b5..ae379c4833a 100644 --- a/src/hotspot/share/opto/node.hpp +++ b/src/hotspot/share/opto/node.hpp @@ -175,7 +175,6 @@ class SubTypeCheckNode; class Type; class TypeNode; class UnlockNode; -class UnorderedReductionNode; class VectorNode; class LoadVectorNode; class LoadVectorMaskedNode; @@ -739,7 +738,6 @@ class Node { DEFINE_CLASS_ID(ExpandV, Vector, 5) DEFINE_CLASS_ID(CompressM, Vector, 6) DEFINE_CLASS_ID(Reduction, Vector, 7) - DEFINE_CLASS_ID(UnorderedReduction, Reduction, 0) DEFINE_CLASS_ID(NegV, Vector, 8) DEFINE_CLASS_ID(Con, Type, 8) DEFINE_CLASS_ID(ConI, Con, 0) @@ -991,7 +989,6 @@ class Node { DEFINE_CLASS_QUERY(Sub) DEFINE_CLASS_QUERY(SubTypeCheck) DEFINE_CLASS_QUERY(Type) - DEFINE_CLASS_QUERY(UnorderedReduction) DEFINE_CLASS_QUERY(Vector) DEFINE_CLASS_QUERY(VectorMaskCmp) DEFINE_CLASS_QUERY(VectorUnbox) diff --git a/src/hotspot/share/opto/output.cpp b/src/hotspot/share/opto/output.cpp index d8a9b14c4ad..8ea456d3417 100644 --- a/src/hotspot/share/opto/output.cpp +++ b/src/hotspot/share/opto/output.cpp @@ -1321,7 +1321,7 @@ CodeBuffer* PhaseOutput::init_buffer() { int code_req = _buf_sizes._code; int const_req = _buf_sizes._const; - int pad_req = NativeCall::instruction_size; + int pad_req = NativeCall::byte_size(); BarrierSetC2* bs = BarrierSet::barrier_set()->barrier_set_c2(); stub_req += bs->estimate_stub_size(); diff --git a/src/hotspot/share/opto/vectorIntrinsics.cpp b/src/hotspot/share/opto/vectorIntrinsics.cpp index 3ef6ae02534..b31f6ace5a6 100644 --- a/src/hotspot/share/opto/vectorIntrinsics.cpp +++ b/src/hotspot/share/opto/vectorIntrinsics.cpp @@ -1616,21 +1616,23 @@ bool LibraryCallKit::inline_vector_reduction() { } Node* init = ReductionNode::make_identity_con_scalar(gvn(), opc, elem_bt); - Node* value = nullptr; - if (mask == nullptr) { - assert(!is_masked_op, "Masked op needs the mask value never null"); - value = ReductionNode::make(opc, nullptr, init, opd, elem_bt); - } else { - if (use_predicate) { - value = ReductionNode::make(opc, nullptr, init, opd, elem_bt); - value->add_req(mask); - value->add_flag(Node::Flag_is_predicated_vector); - } else { - Node* reduce_identity = gvn().transform(VectorNode::scalar2vector(init, num_elem, Type::get_const_basic_type(elem_bt))); - value = gvn().transform(new VectorBlendNode(reduce_identity, opd, mask)); - value = ReductionNode::make(opc, nullptr, init, value, elem_bt); - } + Node* value = opd; + + assert(mask != nullptr || !is_masked_op, "Masked op needs the mask value never null"); + if (mask != nullptr && !use_predicate) { + Node* reduce_identity = gvn().transform(VectorNode::scalar2vector(init, num_elem, Type::get_const_basic_type(elem_bt))); + value = gvn().transform(new VectorBlendNode(reduce_identity, value, mask)); } + + // Make an unordered Reduction node. This affects only AddReductionVF/VD and MulReductionVF/VD, + // as these operations are allowed to be associative (not requiring strict order) in VectorAPI. + value = ReductionNode::make(opc, nullptr, init, value, elem_bt, /* requires_strict_order */ false); + + if (mask != nullptr && use_predicate) { + value->add_req(mask); + value->add_flag(Node::Flag_is_predicated_vector); + } + value = gvn().transform(value); Node* bits = nullptr; diff --git a/src/hotspot/share/opto/vectornode.cpp b/src/hotspot/share/opto/vectornode.cpp index b14a7f7b165..d560f112039 100644 --- a/src/hotspot/share/opto/vectornode.cpp +++ b/src/hotspot/share/opto/vectornode.cpp @@ -1296,7 +1296,8 @@ int ReductionNode::opcode(int opc, BasicType bt) { } // Return the appropriate reduction node. -ReductionNode* ReductionNode::make(int opc, Node *ctrl, Node* n1, Node* n2, BasicType bt) { +ReductionNode* ReductionNode::make(int opc, Node* ctrl, Node* n1, Node* n2, BasicType bt, + bool requires_strict_order) { int vopc = opcode(opc, bt); @@ -1306,17 +1307,17 @@ ReductionNode* ReductionNode::make(int opc, Node *ctrl, Node* n1, Node* n2, Basi switch (vopc) { case Op_AddReductionVI: return new AddReductionVINode(ctrl, n1, n2); case Op_AddReductionVL: return new AddReductionVLNode(ctrl, n1, n2); - case Op_AddReductionVF: return new AddReductionVFNode(ctrl, n1, n2); - case Op_AddReductionVD: return new AddReductionVDNode(ctrl, n1, n2); + case Op_AddReductionVF: return new AddReductionVFNode(ctrl, n1, n2, requires_strict_order); + case Op_AddReductionVD: return new AddReductionVDNode(ctrl, n1, n2, requires_strict_order); case Op_MulReductionVI: return new MulReductionVINode(ctrl, n1, n2); case Op_MulReductionVL: return new MulReductionVLNode(ctrl, n1, n2); - case Op_MulReductionVF: return new MulReductionVFNode(ctrl, n1, n2); - case Op_MulReductionVD: return new MulReductionVDNode(ctrl, n1, n2); - case Op_MinReductionV: return new MinReductionVNode(ctrl, n1, n2); - case Op_MaxReductionV: return new MaxReductionVNode(ctrl, n1, n2); - case Op_AndReductionV: return new AndReductionVNode(ctrl, n1, n2); - case Op_OrReductionV: return new OrReductionVNode(ctrl, n1, n2); - case Op_XorReductionV: return new XorReductionVNode(ctrl, n1, n2); + case Op_MulReductionVF: return new MulReductionVFNode(ctrl, n1, n2, requires_strict_order); + case Op_MulReductionVD: return new MulReductionVDNode(ctrl, n1, n2, requires_strict_order); + case Op_MinReductionV: return new MinReductionVNode (ctrl, n1, n2); + case Op_MaxReductionV: return new MaxReductionVNode (ctrl, n1, n2); + case Op_AndReductionV: return new AndReductionVNode (ctrl, n1, n2); + case Op_OrReductionV: return new OrReductionVNode (ctrl, n1, n2); + case Op_XorReductionV: return new XorReductionVNode (ctrl, n1, n2); default: assert(false, "unknown node: %s", NodeClassNames[vopc]); return nullptr; diff --git a/src/hotspot/share/opto/vectornode.hpp b/src/hotspot/share/opto/vectornode.hpp index 17c7482d88c..6c5402eb511 100644 --- a/src/hotspot/share/opto/vectornode.hpp +++ b/src/hotspot/share/opto/vectornode.hpp @@ -203,7 +203,9 @@ class ReductionNode : public Node { init_class_id(Class_Reduction); } - static ReductionNode* make(int opc, Node* ctrl, Node* in1, Node* in2, BasicType bt); + static ReductionNode* make(int opc, Node* ctrl, Node* in1, Node* in2, BasicType bt, + // This only effects floating-point add and mul reductions. + bool requires_strict_order = true); static int opcode(int opc, BasicType bt); static bool implemented(int opc, uint vlen, BasicType bt); // Make an identity scalar (zero for add, one for mul, etc) for scalar opc. @@ -225,47 +227,97 @@ class ReductionNode : public Node { // Needed for proper cloning. virtual uint size_of() const { return sizeof(*this); } -}; -//---------------------------UnorderedReductionNode------------------------------------- -// Order of reduction does not matter. Example int add. Not true for float add. -class UnorderedReductionNode : public ReductionNode { -public: - UnorderedReductionNode(Node * ctrl, Node* in1, Node* in2) : ReductionNode(ctrl, in1, in2) { - init_class_id(Class_UnorderedReduction); + // Floating-point addition and multiplication are non-associative, so + // AddReductionVF/D and MulReductionVF/D require strict ordering + // in auto-vectorization. Vector API can generate AddReductionVF/D + // and MulReductionVF/VD without strict ordering, which can benefit + // some platforms. + // + // Other reductions don't need strict ordering. + virtual bool requires_strict_order() const { + return false; + } + +#ifndef PRODUCT + void dump_spec(outputStream* st) const { + if (requires_strict_order()) { + st->print("requires_strict_order"); + } else { + st->print("no_strict_order"); + } } +#endif }; //------------------------------AddReductionVINode-------------------------------------- // Vector add byte, short and int as a reduction -class AddReductionVINode : public UnorderedReductionNode { +class AddReductionVINode : public ReductionNode { public: - AddReductionVINode(Node * ctrl, Node* in1, Node* in2) : UnorderedReductionNode(ctrl, in1, in2) {} + AddReductionVINode(Node* ctrl, Node* in1, Node* in2) : ReductionNode(ctrl, in1, in2) {} virtual int Opcode() const; }; //------------------------------AddReductionVLNode-------------------------------------- // Vector add long as a reduction -class AddReductionVLNode : public UnorderedReductionNode { +class AddReductionVLNode : public ReductionNode { public: - AddReductionVLNode(Node *ctrl, Node* in1, Node* in2) : UnorderedReductionNode(ctrl, in1, in2) {} + AddReductionVLNode(Node* ctrl, Node* in1, Node* in2) : ReductionNode(ctrl, in1, in2) {} virtual int Opcode() const; }; //------------------------------AddReductionVFNode-------------------------------------- // Vector add float as a reduction class AddReductionVFNode : public ReductionNode { +private: + // True if add reduction operation for floats requires strict ordering. + // As an example - The value is true when add reduction for floats is auto-vectorized + // as auto-vectorization mandates strict ordering but the value is false when this node + // is generated through VectorAPI as VectorAPI does not impose any such rules on ordering. + const bool _requires_strict_order; public: - AddReductionVFNode(Node *ctrl, Node* in1, Node* in2) : ReductionNode(ctrl, in1, in2) {} + //_requires_strict_order is set to true by default as mandated by auto-vectorization + AddReductionVFNode(Node* ctrl, Node* in1, Node* in2, bool requires_strict_order = true) : + ReductionNode(ctrl, in1, in2), _requires_strict_order(requires_strict_order) {} + virtual int Opcode() const; + + virtual bool requires_strict_order() const { return _requires_strict_order; } + + virtual uint hash() const { return Node::hash() + _requires_strict_order; } + + virtual bool cmp(const Node& n) const { + return Node::cmp(n) && _requires_strict_order == ((ReductionNode&)n).requires_strict_order(); + } + + virtual uint size_of() const { return sizeof(*this); } }; //------------------------------AddReductionVDNode-------------------------------------- // Vector add double as a reduction class AddReductionVDNode : public ReductionNode { +private: + // True if add reduction operation for doubles requires strict ordering. + // As an example - The value is true when add reduction for doubles is auto-vectorized + // as auto-vectorization mandates strict ordering but the value is false when this node + // is generated through VectorAPI as VectorAPI does not impose any such rules on ordering. + const bool _requires_strict_order; public: - AddReductionVDNode(Node *ctrl, Node* in1, Node* in2) : ReductionNode(ctrl, in1, in2) {} + //_requires_strict_order is set to true by default as mandated by auto-vectorization + AddReductionVDNode(Node* ctrl, Node* in1, Node* in2, bool requires_strict_order = true) : + ReductionNode(ctrl, in1, in2), _requires_strict_order(requires_strict_order) {} + virtual int Opcode() const; + + virtual bool requires_strict_order() const { return _requires_strict_order; } + + virtual uint hash() const { return Node::hash() + _requires_strict_order; } + + virtual bool cmp(const Node& n) const { + return Node::cmp(n) && _requires_strict_order == ((ReductionNode&)n).requires_strict_order(); + } + + virtual uint size_of() const { return sizeof(*this); } }; //------------------------------SubVBNode-------------------------------------- @@ -400,34 +452,70 @@ class FmaVFNode : public FmaVNode { //------------------------------MulReductionVINode-------------------------------------- // Vector multiply byte, short and int as a reduction -class MulReductionVINode : public UnorderedReductionNode { +class MulReductionVINode : public ReductionNode { public: - MulReductionVINode(Node *ctrl, Node* in1, Node* in2) : UnorderedReductionNode(ctrl, in1, in2) {} + MulReductionVINode(Node* ctrl, Node* in1, Node* in2) : ReductionNode(ctrl, in1, in2) {} virtual int Opcode() const; }; //------------------------------MulReductionVLNode-------------------------------------- // Vector multiply int as a reduction -class MulReductionVLNode : public UnorderedReductionNode { +class MulReductionVLNode : public ReductionNode { public: - MulReductionVLNode(Node *ctrl, Node* in1, Node* in2) : UnorderedReductionNode(ctrl, in1, in2) {} + MulReductionVLNode(Node* ctrl, Node* in1, Node* in2) : ReductionNode(ctrl, in1, in2) {} virtual int Opcode() const; }; //------------------------------MulReductionVFNode-------------------------------------- // Vector multiply float as a reduction class MulReductionVFNode : public ReductionNode { + // True if mul reduction operation for floats requires strict ordering. + // As an example - The value is true when mul reduction for floats is auto-vectorized + // as auto-vectorization mandates strict ordering but the value is false when this node + // is generated through VectorAPI as VectorAPI does not impose any such rules on ordering. + const bool _requires_strict_order; public: - MulReductionVFNode(Node *ctrl, Node* in1, Node* in2) : ReductionNode(ctrl, in1, in2) {} + //_requires_strict_order is set to true by default as mandated by auto-vectorization + MulReductionVFNode(Node* ctrl, Node* in1, Node* in2, bool requires_strict_order = true) : + ReductionNode(ctrl, in1, in2), _requires_strict_order(requires_strict_order) {} + virtual int Opcode() const; + + virtual bool requires_strict_order() const { return _requires_strict_order; } + + virtual uint hash() const { return Node::hash() + _requires_strict_order; } + + virtual bool cmp(const Node& n) const { + return Node::cmp(n) && _requires_strict_order == ((ReductionNode&)n).requires_strict_order(); + } + + virtual uint size_of() const { return sizeof(*this); } }; //------------------------------MulReductionVDNode-------------------------------------- // Vector multiply double as a reduction class MulReductionVDNode : public ReductionNode { + // True if mul reduction operation for doubles requires strict ordering. + // As an example - The value is true when mul reduction for doubles is auto-vectorized + // as auto-vectorization mandates strict ordering but the value is false when this node + // is generated through VectorAPI as VectorAPI does not impose any such rules on ordering. + const bool _requires_strict_order; public: - MulReductionVDNode(Node *ctrl, Node* in1, Node* in2) : ReductionNode(ctrl, in1, in2) {} + //_requires_strict_order is set to true by default as mandated by auto-vectorization + MulReductionVDNode(Node* ctrl, Node* in1, Node* in2, bool requires_strict_order = true) : + ReductionNode(ctrl, in1, in2), _requires_strict_order(requires_strict_order) {} + virtual int Opcode() const; + + virtual bool requires_strict_order() const { return _requires_strict_order; } + + virtual uint hash() const { return Node::hash() + _requires_strict_order; } + + virtual bool cmp(const Node& n) const { + return Node::cmp(n) && _requires_strict_order == ((ReductionNode&)n).requires_strict_order(); + } + + virtual uint size_of() const { return sizeof(*this); } }; //------------------------------DivVFNode-------------------------------------- @@ -753,9 +841,9 @@ class AndVNode : public VectorNode { //------------------------------AndReductionVNode-------------------------------------- // Vector and byte, short, int, long as a reduction -class AndReductionVNode : public UnorderedReductionNode { +class AndReductionVNode : public ReductionNode { public: - AndReductionVNode(Node *ctrl, Node* in1, Node* in2) : UnorderedReductionNode(ctrl, in1, in2) {} + AndReductionVNode(Node* ctrl, Node* in1, Node* in2) : ReductionNode(ctrl, in1, in2) {} virtual int Opcode() const; }; @@ -770,9 +858,9 @@ class OrVNode : public VectorNode { //------------------------------OrReductionVNode-------------------------------------- // Vector xor byte, short, int, long as a reduction -class OrReductionVNode : public UnorderedReductionNode { +class OrReductionVNode : public ReductionNode { public: - OrReductionVNode(Node *ctrl, Node* in1, Node* in2) : UnorderedReductionNode(ctrl, in1, in2) {} + OrReductionVNode(Node* ctrl, Node* in1, Node* in2) : ReductionNode(ctrl, in1, in2) {} virtual int Opcode() const; }; @@ -787,25 +875,25 @@ class XorVNode : public VectorNode { //------------------------------XorReductionVNode-------------------------------------- // Vector and int, long as a reduction -class XorReductionVNode : public UnorderedReductionNode { +class XorReductionVNode : public ReductionNode { public: - XorReductionVNode(Node *ctrl, Node* in1, Node* in2) : UnorderedReductionNode(ctrl, in1, in2) {} + XorReductionVNode(Node* ctrl, Node* in1, Node* in2) : ReductionNode(ctrl, in1, in2) {} virtual int Opcode() const; }; //------------------------------MinReductionVNode-------------------------------------- // Vector min byte, short, int, long, float, double as a reduction -class MinReductionVNode : public UnorderedReductionNode { +class MinReductionVNode : public ReductionNode { public: - MinReductionVNode(Node *ctrl, Node* in1, Node* in2) : UnorderedReductionNode(ctrl, in1, in2) {} + MinReductionVNode(Node* ctrl, Node* in1, Node* in2) : ReductionNode(ctrl, in1, in2) {} virtual int Opcode() const; }; //------------------------------MaxReductionVNode-------------------------------------- // Vector min byte, short, int, long, float, double as a reduction -class MaxReductionVNode : public UnorderedReductionNode { +class MaxReductionVNode : public ReductionNode { public: - MaxReductionVNode(Node *ctrl, Node* in1, Node* in2) : UnorderedReductionNode(ctrl, in1, in2) {} + MaxReductionVNode(Node* ctrl, Node* in1, Node* in2) : ReductionNode(ctrl, in1, in2) {} virtual int Opcode() const; }; diff --git a/src/java.base/share/man/java.1 b/src/java.base/share/man/java.1 index 05b1ec6cdd8..4bd0306b2b8 100644 --- a/src/java.base/share/man/java.1 +++ b/src/java.base/share/man/java.1 @@ -36,7 +36,7 @@ . ftr VB CB . ftr VBI CBI .\} -.TH "JAVA" "1" "2024" "JDK 24" "JDK Commands" +.TH "JAVA" "1" "2025" "JDK 24-ea" "JDK Commands" .hy .SH NAME .PP diff --git a/src/java.base/share/man/keytool.1 b/src/java.base/share/man/keytool.1 index a61095d4504..63a134eb932 100644 --- a/src/java.base/share/man/keytool.1 +++ b/src/java.base/share/man/keytool.1 @@ -1,4 +1,4 @@ -.\" Copyright (c) 1998, 2023, Oracle and/or its affiliates. All rights reserved. +.\" Copyright (c) 1998, 2024, Oracle and/or its affiliates. All rights reserved. .\" DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. .\" .\" This code is free software; you can redistribute it and/or modify it @@ -36,7 +36,7 @@ . ftr VB CB . ftr VBI CBI .\} -.TH "KEYTOOL" "1" "2024" "JDK 23-ea" "JDK Commands" +.TH "KEYTOOL" "1" "2025" "JDK 24-ea" "JDK Commands" .hy .SH NAME .PP @@ -1747,7 +1747,7 @@ risk. The \f[V]keytool\f[R] command supports these named extensions. The names aren\[aq]t case-sensitive. .TP -\f[V]BC\f[R] or \f[V]BasicContraints\f[R] +\f[V]BC\f[R] or \f[V]BasicConstraints\f[R] Values: .RS .PP diff --git a/src/java.rmi/share/man/rmiregistry.1 b/src/java.rmi/share/man/rmiregistry.1 index 29a2e2c965a..c168e1482a6 100644 --- a/src/java.rmi/share/man/rmiregistry.1 +++ b/src/java.rmi/share/man/rmiregistry.1 @@ -35,7 +35,7 @@ . ftr VB CB . ftr VBI CBI .\} -.TH "RMIREGISTRY" "1" "2024" "JDK 23-ea" "JDK Commands" +.TH "RMIREGISTRY" "1" "2025" "JDK 24-ea" "JDK Commands" .hy .SH NAME .PP diff --git a/src/java.scripting/share/man/jrunscript.1 b/src/java.scripting/share/man/jrunscript.1 index 43029582b3e..59389c274d1 100644 --- a/src/java.scripting/share/man/jrunscript.1 +++ b/src/java.scripting/share/man/jrunscript.1 @@ -35,7 +35,7 @@ . ftr VB CB . ftr VBI CBI .\} -.TH "JRUNSCRIPT" "1" "2024" "JDK 23-ea" "JDK Commands" +.TH "JRUNSCRIPT" "1" "2025" "JDK 24-ea" "JDK Commands" .hy .SH NAME .PP diff --git a/src/jdk.compiler/share/man/javac.1 b/src/jdk.compiler/share/man/javac.1 index de374e7d0c7..ebee0369238 100644 --- a/src/jdk.compiler/share/man/javac.1 +++ b/src/jdk.compiler/share/man/javac.1 @@ -1,4 +1,4 @@ -.\" Copyright (c) 1994, 2024, Oracle and/or its affiliates. All rights reserved. +.\" Copyright (c) 1994, 2023, Oracle and/or its affiliates. All rights reserved. .\" DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. .\" .\" This code is free software; you can redistribute it and/or modify it @@ -35,7 +35,7 @@ . ftr VB CB . ftr VBI CBI .\} -.TH "JAVAC" "1" "2024" "JDK 23-ea" "JDK Commands" +.TH "JAVAC" "1" "2025" "JDK 24-ea" "JDK Commands" .hy .SH NAME .PP diff --git a/src/jdk.compiler/share/man/serialver.1 b/src/jdk.compiler/share/man/serialver.1 index cd437b7e9bf..bad14872ee6 100644 --- a/src/jdk.compiler/share/man/serialver.1 +++ b/src/jdk.compiler/share/man/serialver.1 @@ -35,7 +35,7 @@ . ftr VB CB . ftr VBI CBI .\} -.TH "SERIALVER" "1" "2024" "JDK 23-ea" "JDK Commands" +.TH "SERIALVER" "1" "2025" "JDK 24-ea" "JDK Commands" .hy .SH NAME .PP diff --git a/src/jdk.hotspot.agent/share/man/jhsdb.1 b/src/jdk.hotspot.agent/share/man/jhsdb.1 index 30b2d3fe95c..5b65f7eafb4 100644 --- a/src/jdk.hotspot.agent/share/man/jhsdb.1 +++ b/src/jdk.hotspot.agent/share/man/jhsdb.1 @@ -35,7 +35,7 @@ . ftr VB CB . ftr VBI CBI .\} -.TH "JHSDB" "1" "2024" "JDK 23-ea" "JDK Commands" +.TH "JHSDB" "1" "2025" "JDK 24-ea" "JDK Commands" .hy .SH NAME .PP diff --git a/src/jdk.httpserver/share/man/jwebserver.1 b/src/jdk.httpserver/share/man/jwebserver.1 index 3c48d81fc9c..4fbaf9dd09d 100644 --- a/src/jdk.httpserver/share/man/jwebserver.1 +++ b/src/jdk.httpserver/share/man/jwebserver.1 @@ -35,7 +35,7 @@ . ftr VB CB . ftr VBI CBI .\} -.TH "JWEBSERVER" "1" "2024" "JDK 23-ea" "JDK Commands" +.TH "JWEBSERVER" "1" "2025" "JDK 24-ea" "JDK Commands" .hy .SH NAME .PP diff --git a/src/jdk.jartool/share/man/jar.1 b/src/jdk.jartool/share/man/jar.1 index 49a028e0f29..2d983eb561e 100644 --- a/src/jdk.jartool/share/man/jar.1 +++ b/src/jdk.jartool/share/man/jar.1 @@ -35,7 +35,7 @@ . ftr VB CB . ftr VBI CBI .\} -.TH "JAR" "1" "2024" "JDK 23-ea" "JDK Commands" +.TH "JAR" "1" "2025" "JDK 24-ea" "JDK Commands" .hy .SH NAME .PP diff --git a/src/jdk.jartool/share/man/jarsigner.1 b/src/jdk.jartool/share/man/jarsigner.1 index ea8f686a4c8..d085efcfcd0 100644 --- a/src/jdk.jartool/share/man/jarsigner.1 +++ b/src/jdk.jartool/share/man/jarsigner.1 @@ -36,7 +36,7 @@ . ftr VB CB . ftr VBI CBI .\} -.TH "JARSIGNER" "1" "2024" "JDK 23-ea" "JDK Commands" +.TH "JARSIGNER" "1" "2025" "JDK 24-ea" "JDK Commands" .hy .SH NAME .PP diff --git a/src/jdk.javadoc/share/man/javadoc.1 b/src/jdk.javadoc/share/man/javadoc.1 index 73d12efbf30..4e256a7ce38 100644 --- a/src/jdk.javadoc/share/man/javadoc.1 +++ b/src/jdk.javadoc/share/man/javadoc.1 @@ -35,7 +35,7 @@ . ftr VB CB . ftr VBI CBI .\} -.TH "JAVADOC" "1" "2024" "JDK 23-ea" "JDK Commands" +.TH "JAVADOC" "1" "2025" "JDK 24-ea" "JDK Commands" .hy .SH NAME .PP @@ -218,7 +218,7 @@ summary is required. For more explicit control in any individual documentation comment, enclose the contents of the first sentence in a \f[V]{\[at]summary ...}\f[R] tag, or when applicable, in a -\[ga]{\[at]return ...} tag. +\f[V]{\[at]return ...}\f[R] tag. .RE .TP \f[V]-doclet\f[R] \f[I]class\f[R] @@ -522,7 +522,8 @@ Allow JavaScript in documentation comments, and options whose value is \f[I]html-code\f[R]. .TP \f[V]-author\f[R] -Includes the \f[V]\[at]author\f[R] text in the generated docs. +Includes the text of any \f[V]author\f[R] tags in the generated +documentation. .TP \f[V]-bottom\f[R] \f[I]html-code\f[R] Specifies the text to be placed at the bottom of each generated page. @@ -986,8 +987,8 @@ is used. .RE .TP \f[V]-nosince\f[R] -Omits from the generated documents the \f[V]Since\f[R] sections -associated with the \f[V]\[at]since\f[R] tags. +Omits from the generated documentation the \f[V]Since\f[R] sections +derived from any \f[V]since\f[R] tags. .TP \f[V]-notimestamp\f[R] Suppresses the time stamp, which is hidden in an HTML comment in the @@ -1020,9 +1021,6 @@ to the current working directory. .PP The file may be an HTML file, with a filename ending in \f[V].html\f[R], or a Markdown file, with a filename ending in \f[V].md\f[R]. -.PD 0 -.P -.PD If the file is an HTML file, the content for the overview documentation is taken from the \f[V]
\f[R] element in the file, if one is present, or from the \f[V]\f[R] element is there is no @@ -1213,10 +1211,11 @@ To access the generated Use page, go to the class or package and click the \f[B]USE\f[R] link in the navigation bar. .TP \f[V]-version\f[R] -Includes the version text in the generated docs. +Includes the text of any \f[V]version\f[R] tags in the generated +documentation. This text is omitted by default. -To find out what version of the \f[V]javadoc\f[R] tool you are using, -use the \f[V]--version\f[R] option (with two hyphens). +Note: To find out what version of the \f[V]javadoc\f[R] tool you are +using, use the \f[V]--version\f[R] option (with two hyphens). .TP \f[V]-windowtitle\f[R] \f[I]title\f[R] Specifies the title to be placed in the HTML \f[V]\f[R] tag. diff --git a/src/jdk.jcmd/share/man/jcmd.1 b/src/jdk.jcmd/share/man/jcmd.1 index c88d2a22a41..2befaf5a949 100644 --- a/src/jdk.jcmd/share/man/jcmd.1 +++ b/src/jdk.jcmd/share/man/jcmd.1 @@ -35,7 +35,7 @@ . ftr VB CB . ftr VBI CBI .\} -.TH "JCMD" "1" "2024" "JDK 23-ea" "JDK Commands" +.TH "JCMD" "1" "2025" "JDK 24-ea" "JDK Commands" .hy .SH NAME .PP diff --git a/src/jdk.jcmd/share/man/jinfo.1 b/src/jdk.jcmd/share/man/jinfo.1 index fc87807c6fe..49d8a852633 100644 --- a/src/jdk.jcmd/share/man/jinfo.1 +++ b/src/jdk.jcmd/share/man/jinfo.1 @@ -35,7 +35,7 @@ . ftr VB CB . ftr VBI CBI .\} -.TH "JINFO" "1" "2024" "JDK 23-ea" "JDK Commands" +.TH "JINFO" "1" "2025" "JDK 24-ea" "JDK Commands" .hy .SH NAME .PP diff --git a/src/jdk.jcmd/share/man/jmap.1 b/src/jdk.jcmd/share/man/jmap.1 index 4865f43d1c4..42831aa68ac 100644 --- a/src/jdk.jcmd/share/man/jmap.1 +++ b/src/jdk.jcmd/share/man/jmap.1 @@ -35,7 +35,7 @@ . ftr VB CB . ftr VBI CBI .\} -.TH "JMAP" "1" "2024" "JDK 23-ea" "JDK Commands" +.TH "JMAP" "1" "2025" "JDK 24-ea" "JDK Commands" .hy .SH NAME .PP diff --git a/src/jdk.jcmd/share/man/jps.1 b/src/jdk.jcmd/share/man/jps.1 index be506be2e82..cbc69872b60 100644 --- a/src/jdk.jcmd/share/man/jps.1 +++ b/src/jdk.jcmd/share/man/jps.1 @@ -35,7 +35,7 @@ . ftr VB CB . ftr VBI CBI .\} -.TH "JPS" "1" "2024" "JDK 23-ea" "JDK Commands" +.TH "JPS" "1" "2025" "JDK 24-ea" "JDK Commands" .hy .SH NAME .PP diff --git a/src/jdk.jcmd/share/man/jstack.1 b/src/jdk.jcmd/share/man/jstack.1 index 9effcb0902f..933db5fc80d 100644 --- a/src/jdk.jcmd/share/man/jstack.1 +++ b/src/jdk.jcmd/share/man/jstack.1 @@ -35,7 +35,7 @@ . ftr VB CB . ftr VBI CBI .\} -.TH "JSTACK" "1" "2024" "JDK 23-ea" "JDK Commands" +.TH "JSTACK" "1" "2025" "JDK 24-ea" "JDK Commands" .hy .SH NAME .PP diff --git a/src/jdk.jcmd/share/man/jstat.1 b/src/jdk.jcmd/share/man/jstat.1 index dd7d3832908..22e111a812b 100644 --- a/src/jdk.jcmd/share/man/jstat.1 +++ b/src/jdk.jcmd/share/man/jstat.1 @@ -35,7 +35,7 @@ . ftr VB CB . ftr VBI CBI .\} -.TH "JSTAT" "1" "2024" "JDK 23-ea" "JDK Commands" +.TH "JSTAT" "1" "2025" "JDK 24-ea" "JDK Commands" .hy .SH NAME .PP diff --git a/src/jdk.jconsole/share/man/jconsole.1 b/src/jdk.jconsole/share/man/jconsole.1 index ce1f948e6f1..ec70040acf4 100644 --- a/src/jdk.jconsole/share/man/jconsole.1 +++ b/src/jdk.jconsole/share/man/jconsole.1 @@ -35,7 +35,7 @@ . ftr VB CB . ftr VBI CBI .\} -.TH "JCONSOLE" "1" "2024" "JDK 23-ea" "JDK Commands" +.TH "JCONSOLE" "1" "2025" "JDK 24-ea" "JDK Commands" .hy .SH NAME .PP diff --git a/src/jdk.jdeps/share/man/javap.1 b/src/jdk.jdeps/share/man/javap.1 index f9ec998e0af..27b0a29d0ba 100644 --- a/src/jdk.jdeps/share/man/javap.1 +++ b/src/jdk.jdeps/share/man/javap.1 @@ -35,7 +35,7 @@ . ftr VB CB . ftr VBI CBI .\} -.TH "JAVAP" "1" "2024" "JDK 23-ea" "JDK Commands" +.TH "JAVAP" "1" "2025" "JDK 24-ea" "JDK Commands" .hy .SH NAME .PP diff --git a/src/jdk.jdeps/share/man/jdeprscan.1 b/src/jdk.jdeps/share/man/jdeprscan.1 index ac850cf78de..fc13f05c449 100644 --- a/src/jdk.jdeps/share/man/jdeprscan.1 +++ b/src/jdk.jdeps/share/man/jdeprscan.1 @@ -35,7 +35,7 @@ . ftr VB CB . ftr VBI CBI .\} -.TH "JDEPRSCAN" "1" "2024" "JDK 23-ea" "JDK Commands" +.TH "JDEPRSCAN" "1" "2025" "JDK 24-ea" "JDK Commands" .hy .SH NAME .PP diff --git a/src/jdk.jdeps/share/man/jdeps.1 b/src/jdk.jdeps/share/man/jdeps.1 index 53891441665..d3dde37e3b9 100644 --- a/src/jdk.jdeps/share/man/jdeps.1 +++ b/src/jdk.jdeps/share/man/jdeps.1 @@ -35,7 +35,7 @@ . ftr VB CB . ftr VBI CBI .\} -.TH "JDEPS" "1" "2024" "JDK 23-ea" "JDK Commands" +.TH "JDEPS" "1" "2025" "JDK 24-ea" "JDK Commands" .hy .SH NAME .PP diff --git a/src/jdk.jdi/share/man/jdb.1 b/src/jdk.jdi/share/man/jdb.1 index 530c9bed6a1..88097ffeae4 100644 --- a/src/jdk.jdi/share/man/jdb.1 +++ b/src/jdk.jdi/share/man/jdb.1 @@ -35,7 +35,7 @@ . ftr VB CB . ftr VBI CBI .\} -.TH "JDB" "1" "2024" "JDK 23-ea" "JDK Commands" +.TH "JDB" "1" "2025" "JDK 24-ea" "JDK Commands" .hy .SH NAME .PP diff --git a/src/jdk.jfr/share/man/jfr.1 b/src/jdk.jfr/share/man/jfr.1 index c6a568582c0..71a487f558e 100644 --- a/src/jdk.jfr/share/man/jfr.1 +++ b/src/jdk.jfr/share/man/jfr.1 @@ -35,7 +35,7 @@ . ftr VB CB . ftr VBI CBI .\} -.TH "JFR" "1" "2024" "JDK 23-ea" "JDK Commands" +.TH "JFR" "1" "2025" "JDK 24-ea" "JDK Commands" .hy .SH NAME .PP diff --git a/src/jdk.jlink/share/man/jlink.1 b/src/jdk.jlink/share/man/jlink.1 index 1a0b79a39c1..9f4bf38ffa5 100644 --- a/src/jdk.jlink/share/man/jlink.1 +++ b/src/jdk.jlink/share/man/jlink.1 @@ -35,7 +35,7 @@ . ftr VB CB . ftr VBI CBI .\} -.TH "JLINK" "1" "2024" "JDK 23-ea" "JDK Commands" +.TH "JLINK" "1" "2025" "JDK 24-ea" "JDK Commands" .hy .SH NAME .PP diff --git a/src/jdk.jlink/share/man/jmod.1 b/src/jdk.jlink/share/man/jmod.1 index b26685615ed..4475505e524 100644 --- a/src/jdk.jlink/share/man/jmod.1 +++ b/src/jdk.jlink/share/man/jmod.1 @@ -35,7 +35,7 @@ . ftr VB CB . ftr VBI CBI .\} -.TH "JMOD" "1" "2024" "JDK 23-ea" "JDK Commands" +.TH "JMOD" "1" "2025" "JDK 24-ea" "JDK Commands" .hy .SH NAME .PP diff --git a/src/jdk.jpackage/share/man/jpackage.1 b/src/jdk.jpackage/share/man/jpackage.1 index f9848200059..13d9c41c31d 100644 --- a/src/jdk.jpackage/share/man/jpackage.1 +++ b/src/jdk.jpackage/share/man/jpackage.1 @@ -35,7 +35,7 @@ . ftr VB CB . ftr VBI CBI .\} -.TH "JPACKAGE" "1" "2024" "JDK 23-ea" "JDK Commands" +.TH "JPACKAGE" "1" "2025" "JDK 24-ea" "JDK Commands" .hy .SH NAME .PP diff --git a/src/jdk.jshell/share/man/jshell.1 b/src/jdk.jshell/share/man/jshell.1 index 28160bb49fd..6f478e57442 100644 --- a/src/jdk.jshell/share/man/jshell.1 +++ b/src/jdk.jshell/share/man/jshell.1 @@ -36,7 +36,7 @@ . ftr VB CB . ftr VBI CBI .\} -.TH "JSHELL" "1" "2024" "JDK 23-ea" "JDK Commands" +.TH "JSHELL" "1" "2025" "JDK 24-ea" "JDK Commands" .hy .SH NAME .PP diff --git a/src/jdk.jstatd/share/man/jstatd.1 b/src/jdk.jstatd/share/man/jstatd.1 index cbbaeaf49f2..4bd90104624 100644 --- a/src/jdk.jstatd/share/man/jstatd.1 +++ b/src/jdk.jstatd/share/man/jstatd.1 @@ -35,7 +35,7 @@ . ftr VB CB . ftr VBI CBI .\} -.TH "JSTATD" "1" "2024" "JDK 23-ea" "JDK Commands" +.TH "JSTATD" "1" "2025" "JDK 24-ea" "JDK Commands" .hy .SH NAME .PP diff --git a/test/hotspot/gtest/gc/shared/test_collectorPolicy.cpp b/test/hotspot/gtest/gc/shared/test_collectorPolicy.cpp index 40df20ba964..102ede42de6 100644 --- a/test/hotspot/gtest/gc/shared/test_collectorPolicy.cpp +++ b/test/hotspot/gtest/gc/shared/test_collectorPolicy.cpp @@ -212,18 +212,6 @@ class TestGenCollectorPolicy { // depends on so many other configurable variables. These tests only try to // verify that there are some basic rules for NewSize honored by the policies. -// If NewSize has been ergonomically set, the collector policy -// should use it for min -// This test doesn't work with 64k pages. See JDK-8331675. -#if !defined(PPC) -TEST_VM(CollectorPolicy, young_min_ergo) { - TestGenCollectorPolicy::SetNewSizeErgo setter(20 * M); - TestGenCollectorPolicy::CheckYoungMin checker(20 * M); - - TestGenCollectorPolicy::TestWrapper::test(&setter, &checker); -} -#endif - // If NewSize has been ergonomically set, the collector policy // should use it for min but calculate the initial young size // using NewRatio. diff --git a/test/hotspot/jtreg/ProblemList.txt b/test/hotspot/jtreg/ProblemList.txt index 5da6ebaa6d5..78e09c0627a 100644 --- a/test/hotspot/jtreg/ProblemList.txt +++ b/test/hotspot/jtreg/ProblemList.txt @@ -122,6 +122,7 @@ serviceability/sa/TestRevPtrsForInvokeDynamic.java 8241235 generic-all serviceability/jvmti/ModuleAwareAgents/ThreadStart/MAAThreadStart.java 8225354 windows-all serviceability/jvmti/vthread/GetThreadStateMountedTest/GetThreadStateMountedTest.java 8318090,8318729 generic-all serviceability/jvmti/vthread/GetSetLocalTest/GetSetLocalTest.java 8286836 generic-all +serviceability/jvmti/vthread/CarrierThreadEventNotification/CarrierThreadEventNotification.java 8333681 generic-all serviceability/dcmd/gc/RunFinalizationTest.java 8227120 generic-all serviceability/sa/ClhsdbCDSCore.java 8267433 macosx-x64 diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/TestVectorFPReduction.java b/test/hotspot/jtreg/compiler/loopopts/superword/TestVectorFPReduction.java new file mode 100644 index 00000000000..327e6e5e12d --- /dev/null +++ b/test/hotspot/jtreg/compiler/loopopts/superword/TestVectorFPReduction.java @@ -0,0 +1,111 @@ +/* + * Copyright (c) 2024, Arm Limited. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +package compiler.loopopts.superword; + +import compiler.lib.ir_framework.*; + +/* + * @test + * @bug 8320725 + * @summary Ensure strictly ordered AddReductionVF/VD and MulReductionVF/VD nodes + are generated when these operations are auto-vectorized + * @library /test/lib / + * @run driver compiler.loopopts.superword.TestVectorFPReduction + */ + +public class TestVectorFPReduction { + + final private static int SIZE = 1024; + + private static double[] da = new double[SIZE]; + private static double[] db = new double[SIZE]; + private static float[] fa = new float[SIZE]; + private static float[] fb = new float[SIZE]; + private static float fresult; + private static double dresult; + + public static void main(String[] args) { + TestFramework.run(); + } + + @Test + @IR(failOn = {IRNode.ADD_REDUCTION_VF}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + @IR(counts = {"requires_strict_order", ">=1", IRNode.ADD_REDUCTION_VF, ">=1"}, + failOn = {"no_strict_order"}, + applyIfCPUFeatureOr = {"sve", "true", "sse2", "true"}, + phase = CompilePhase.PRINT_IDEAL) + private static void testAddReductionVF() { + float result = 1; + for (int i = 0; i < SIZE; i++) { + result += (fa[i] + fb[i]); + } + fresult += result; + } + + @Test + @IR(failOn = {IRNode.ADD_REDUCTION_VD}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + @IR(counts = {"requires_strict_order", ">=1", IRNode.ADD_REDUCTION_VD, ">=1"}, + failOn = {"no_strict_order"}, + applyIfCPUFeatureOr = {"sve", "true", "sse2", "true"}, + phase = CompilePhase.PRINT_IDEAL) + private static void testAddReductionVD() { + double result = 1; + for (int i = 0; i < SIZE; i++) { + result += (da[i] + db[i]); + } + dresult += result; + } + + @Test + @IR(failOn = {IRNode.MUL_REDUCTION_VF}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + @IR(counts = {"requires_strict_order", ">=1", IRNode.MUL_REDUCTION_VF, ">=1"}, + failOn = {"no_strict_order"}, + applyIfCPUFeatureOr = {"sve", "true", "sse2", "true"}, + phase = CompilePhase.PRINT_IDEAL) + private static void testMulReductionVF() { + float result = 1; + for (int i = 0; i < SIZE; i++) { + result *= (fa[i] + fb[i]); + } + fresult += result; + } + + @Test + @IR(failOn = {IRNode.MUL_REDUCTION_VD}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + @IR(counts = {"requires_strict_order", ">=1", IRNode.MUL_REDUCTION_VD, ">=1"}, + failOn = {"no_strict_order"}, + applyIfCPUFeatureOr = {"sve", "true", "sse2", "true"}, + phase = CompilePhase.PRINT_IDEAL) + private static void testMulReductionVD() { + double result = 1; + for (int i = 0; i < SIZE; i++) { + result *= (da[i] + db[i]); + } + dresult += result; + } +} diff --git a/test/hotspot/jtreg/compiler/vectorapi/TestVectorAddMulReduction.java b/test/hotspot/jtreg/compiler/vectorapi/TestVectorAddMulReduction.java new file mode 100644 index 00000000000..549d9aa5d49 --- /dev/null +++ b/test/hotspot/jtreg/compiler/vectorapi/TestVectorAddMulReduction.java @@ -0,0 +1,211 @@ +/* + * Copyright (c) 2024, Arm Limited. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +package compiler.vectorapi; + +import compiler.lib.ir_framework.*; + +import jdk.incubator.vector.DoubleVector; +import jdk.incubator.vector.FloatVector; +import jdk.incubator.vector.VectorOperators; +import jdk.incubator.vector.VectorSpecies; + +import java.util.Random; + +import jdk.test.lib.Asserts; +import jdk.test.lib.Utils; + +/** + * @test + * @bug 8320725 + * @library /test/lib / + * @summary Verify non-strictly ordered AddReductionVF/VD and MulReductionVF/VD + * nodes are generated in VectorAPI + * @modules jdk.incubator.vector + * @run driver compiler.vectorapi.TestVectorAddMulReduction + */ + +public class TestVectorAddMulReduction { + + private static final int SIZE = 1024; + private static final Random RD = Utils.getRandomInstance(); + + private static float[] fa; + private static float fres; + private static double[] da; + private static double dres; + + static { + fa = new float[SIZE]; + da = new double[SIZE]; + fres = 1; + dres = 1; + for (int i = 0; i < SIZE; i++) { + fa[i] = RD.nextFloat(); + da[i] = RD.nextDouble(); + } + } + + // Test add reduction operation for floats + @ForceInline + public static void testFloatAddKernel(VectorSpecies SPECIES, float[] f) { + for (int i = 0; i < SPECIES.loopBound(f.length); i += SPECIES.length()) { + var av = FloatVector.fromArray(SPECIES, f, i); + fres += av.reduceLanes(VectorOperators.ADD); + } + } + + @Test + @IR(counts = {IRNode.ADD_REDUCTION_VF, ">=1", "no_strict_order", ">=1"}, + failOn = {"requires_strict_order"}, + applyIfCPUFeatureOr = {"asimd", "true", "sse2", "true"}, + applyIf = {"MaxVectorSize", ">=8"}, + phase = CompilePhase.PRINT_IDEAL) + public static void testFloatAdd_64() { + testFloatAddKernel(FloatVector.SPECIES_64, fa); + } + + @Test + @IR(counts = {IRNode.ADD_REDUCTION_VF, ">=1", "no_strict_order", ">=1"}, + failOn = {"requires_strict_order"}, + applyIfCPUFeatureOr = {"asimd", "true", "sse2", "true"}, + applyIf = {"MaxVectorSize", ">=16"}, + phase = CompilePhase.PRINT_IDEAL) + public static void testFloatAdd_128() { + testFloatAddKernel(FloatVector.SPECIES_128, fa); + } + + @Test + @IR(counts = {IRNode.ADD_REDUCTION_VF, ">=1", "no_strict_order", ">=1"}, + failOn = {"requires_strict_order"}, + applyIfCPUFeatureOr = {"asimd", "true", "sse2", "true"}, + applyIf = {"MaxVectorSize", ">=32"}, + phase = CompilePhase.PRINT_IDEAL) + public static void testFloatAdd_256() { + testFloatAddKernel(FloatVector.SPECIES_256, fa); + } + + @Test + @IR(counts = {IRNode.ADD_REDUCTION_VF, ">=1", "no_strict_order", ">=1"}, + failOn = {"requires_strict_order"}, + applyIfCPUFeatureOr = {"asimd", "true", "sse2", "true"}, + applyIf = {"MaxVectorSize", ">=64"}, + phase = CompilePhase.PRINT_IDEAL) + public static void testFloatAdd_512() { + testFloatAddKernel(FloatVector.SPECIES_512, fa); + } + + // Test add reduction operation for doubles + @ForceInline + public static void testDoubleAddKernel(VectorSpecies SPECIES, double[] d) { + for (int i = 0; i < SPECIES.loopBound(d.length); i += SPECIES.length()) { + var av = DoubleVector.fromArray(SPECIES, d, i); + dres += av.reduceLanes(VectorOperators.ADD); + } + } + + @Test + @IR(counts = {IRNode.ADD_REDUCTION_VD, ">=1", "no_strict_order", ">=1"}, + failOn = {"requires_strict_order"}, + applyIfCPUFeatureOr = {"asimd", "true", "sse2", "true"}, + applyIf = {"MaxVectorSize", ">=16"}, + phase = CompilePhase.PRINT_IDEAL) + public static void testDoubleAdd_128() { + testDoubleAddKernel(DoubleVector.SPECIES_128, da); + } + + @Test + @IR(counts = {IRNode.ADD_REDUCTION_VD, ">=1", "no_strict_order", ">=1"}, + failOn = {"requires_strict_order"}, + applyIfCPUFeatureOr = {"asimd", "true", "sse2", "true"}, + applyIf = {"MaxVectorSize", ">=32"}, + phase = CompilePhase.PRINT_IDEAL) + public static void testDoubleAdd_256() { + testDoubleAddKernel(DoubleVector.SPECIES_256, da); + } + + @Test + @IR(counts = {IRNode.ADD_REDUCTION_VD, ">=1", "no_strict_order", ">=1"}, + failOn = {"requires_strict_order"}, + applyIfCPUFeatureOr = {"asimd", "true", "sse2", "true"}, + applyIf = {"MaxVectorSize", ">=64"}, + phase = CompilePhase.PRINT_IDEAL) + public static void testDoubleAdd_512() { + testDoubleAddKernel(DoubleVector.SPECIES_512, da); + } + + // Test mul reduction operation for floats + // On aarch64, there are no direct vector mul reduction instructions for float/double mul reduction + // and scalar instructions are emitted for 64-bit/128-bit vectors. Thus MulReductionVF/VD nodes are generated + // only for vector length of 8B/16B on vectorAPI. + @ForceInline + public static void testFloatMulKernel(VectorSpecies SPECIES, float[] f) { + for (int i = 0; i < SPECIES.loopBound(f.length); i += SPECIES.length()) { + var av = FloatVector.fromArray(SPECIES, f, i); + fres += av.reduceLanes(VectorOperators.MUL); + } + } + + @Test + @IR(counts = {IRNode.MUL_REDUCTION_VF, ">=1", "no_strict_order", ">=1"}, + failOn = {"requires_strict_order"}, + applyIfCPUFeatureOr = {"asimd", "true", "sse2", "true"}, + applyIf = {"MaxVectorSize", ">=8"}, + phase = CompilePhase.PRINT_IDEAL) + public static void testFloatMul_64() { + testFloatMulKernel(FloatVector.SPECIES_64, fa); + } + + @Test + @IR(counts = {IRNode.MUL_REDUCTION_VF, ">=1", "no_strict_order", ">=1"}, + failOn = {"requires_strict_order"}, + applyIfCPUFeatureOr = {"asimd", "true", "sse2", "true"}, + applyIf = {"MaxVectorSize", ">=16"}, + phase = CompilePhase.PRINT_IDEAL) + public static void testFloatMul_128() { + testFloatMulKernel(FloatVector.SPECIES_128, fa); + } + + // Test mul reduction operation for doubles + @ForceInline + public static void testDoubleMulKernel(VectorSpecies SPECIES, double[] d) { + for (int i = 0; i < SPECIES.loopBound(d.length); i += SPECIES.length()) { + var av = DoubleVector.fromArray(SPECIES, d, i); + dres += av.reduceLanes(VectorOperators.MUL); + } + } + + @Test + @IR(counts = {IRNode.MUL_REDUCTION_VD, ">=1", "no_strict_order", ">=1"}, + failOn = {"requires_strict_order"}, + applyIfCPUFeatureOr = {"asimd", "true", "sse2", "true"}, + applyIf = {"MaxVectorSize", ">=16"}, + phase = CompilePhase.PRINT_IDEAL) + public static void testDoubleMul_128() { + testDoubleMulKernel(DoubleVector.SPECIES_128, da); + } + + public static void main(String[] args) { + TestFramework.runWithFlags("--add-modules=jdk.incubator.vector"); + } +} diff --git a/test/jdk/java/lang/instrument/NativeMethodPrefixApp.java b/test/jdk/java/lang/instrument/NativeMethodPrefixApp.java index 5060e67ca87..65cc54793cb 100644 --- a/test/jdk/java/lang/instrument/NativeMethodPrefixApp.java +++ b/test/jdk/java/lang/instrument/NativeMethodPrefixApp.java @@ -25,6 +25,7 @@ import java.io.File; import java.nio.file.Path; import java.lang.management.*; +import java.util.zip.CRC32; import bootreporter.*; import jdk.test.lib.helpers.ClassFileInstaller; @@ -96,6 +97,10 @@ private static void launchApp(final Path agentJar) throws Exception { final OutputAnalyzer oa = ProcessTools.executeTestJava( "--enable-preview", // due to usage of ClassFile API PreviewFeature in the agent "-javaagent:" + agentJar.toString(), + // We disable CheckIntrinsic because the NativeMethodPrefixAgent modifies + // the native method names, which then causes a failure in the VM check + // for the presence of an intrinsic on a @IntrinsicCandidate native method. + "-XX:+UnlockDiagnosticVMOptions", "-XX:-CheckIntrinsics", NativeMethodPrefixApp.class.getName()); oa.shouldHaveExitValue(0); // make available stdout/stderr in the logs, even in case of successful completion @@ -109,6 +114,10 @@ private void run() throws Exception { java.lang.reflect.Array.getLength(new short[5]); RuntimeMXBean mxbean = ManagementFactory.getRuntimeMXBean(); System.err.println(mxbean.getVmVendor()); + // Simply load a class containing an @IntrinsicCandidate on a native method + // to exercise the VM code which verifies the presence of the intrinsic + // implementation for that method. + System.err.println(new CRC32()); NativeMethodPrefixAgent.checkErrors();