From b4ae2b752230fb95dc8dd12564f4223771227fe2 Mon Sep 17 00:00:00 2001 From: Marshall Ward Date: Mon, 17 Jun 2024 11:30:01 -0400 Subject: [PATCH] Disable nonrepro FMAs in convex BBL solver Three FMAs were introduced when the BBL convex L(:) computation were moved to a separate function. Parentheses have been added to disable these FMAs. Calculation of the cell width fractions `L` was consolidated and moved into a new function, `find_L_open_concave_analytic` (later renamed to `find_open_L_open_concave_trigonometric`). When compiled with Intel and using the following flags, -O2 -march=core-avx2 -fp-model source -qno-opt-dynamic-align additional FMAs are added to the function, specifically the following expressions: crv_3 = (Dp + Dm - 2.0*D_vel) L(K) = L0*(1.0 + ax2_3apb*L0) L(K) = apb_4a * (1.0 - 2.0 * cos( C1_3*acos(a2x48_apb3*vol_below(K) - 1.0) - C2pi_3) ) (Third expression FMA was added inside of `acos()` by the compiler) The crucial flag seems to be `-march=core-avx2` which controls the introduction of FMAs and explains why the problem went undetected. The legacy answers were restored by adding new parentheses to the above equations. Since this function is not generally considered reproducible and is provided for legacy support, the loss of performance is not considered to be an issue. --- src/parameterizations/vertical/MOM_set_viscosity.F90 | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/parameterizations/vertical/MOM_set_viscosity.F90 b/src/parameterizations/vertical/MOM_set_viscosity.F90 index e59e5d99aa..d1d333b5ce 100644 --- a/src/parameterizations/vertical/MOM_set_viscosity.F90 +++ b/src/parameterizations/vertical/MOM_set_viscosity.F90 @@ -1178,7 +1178,8 @@ subroutine find_L_open_concave_trigonometric(vol_below, D_vel, Dp, Dm, L, GV) ! Each cell extends from x=-1/2 to 1/2, and has a topography ! given by D(x) = crv*x^2 + slope*x + D_vel - crv/12. - crv_3 = (Dp + Dm - 2.0*D_vel) ; crv = 3.0*crv_3 + !crv_3 = (Dp + Dm - 2.0*D_vel) ; crv = 3.0*crv_3 + crv_3 = (Dp + Dm - (2.0*D_vel)) ; crv = 3.0*crv_3 slope = Dp - Dm ! Calculate the volume above which the entire cell is open and the volume at which the @@ -1205,10 +1206,13 @@ subroutine find_L_open_concave_trigonometric(vol_below, D_vel, Dp, Dm, L, GV) ! vol_below(K) = 0.5*L^2*(slope + crv/3*(3-4L)). if (a2x48_apb3*vol_below(K) < 1e-8) then ! Could be 1e-7? ! There is a very good approximation here for massless layers. - L0 = sqrt(2.0*vol_below(K)*Iapb) ; L(K) = L0*(1.0 + ax2_3apb*L0) + !L0 = sqrt(2.0*vol_below(K)*Iapb) ; L(K) = L0*(1.0 + ax2_3apb*L0) + L0 = sqrt(2.0*vol_below(K)*Iapb) ; L(K) = L0*(1.0 + (ax2_3apb*L0)) else + !L(K) = apb_4a * (1.0 - & + ! 2.0 * cos(C1_3*acos(a2x48_apb3*vol_below(K) - 1.0) - C2pi_3)) L(K) = apb_4a * (1.0 - & - 2.0 * cos(C1_3*acos(a2x48_apb3*vol_below(K) - 1.0) - C2pi_3)) + 2.0 * cos(C1_3*acos((a2x48_apb3*vol_below(K)) - 1.0) - C2pi_3)) endif ! To check the answers. ! Vol_err = 0.5*(L(K)*L(K))*(slope + crv_3*(3.0-4.0*L(K))) - vol_below(K)