https://www.cdslab.org/paramonte/fortran/2
Current view: top level - test - test_pm_matrixMulAdd@routines.inc.F90 (source / functions) Hit Total Coverage
Test: ParaMonte 2.0.0 :: Serial Fortran - Code Coverage Report Lines: 468 468 100.0 %
Date: 2024-04-08 03:18:57 Functions: 143 143 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
       2             : !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
       3             : !!!!                                                                                                                            !!!!
       4             : !!!!    ParaMonte: Parallel Monte Carlo and Machine Learning Library.                                                           !!!!
       5             : !!!!                                                                                                                            !!!!
       6             : !!!!    Copyright (C) 2012-present, The Computational Data Science Lab                                                          !!!!
       7             : !!!!                                                                                                                            !!!!
       8             : !!!!    This file is part of the ParaMonte library.                                                                             !!!!
       9             : !!!!                                                                                                                            !!!!
      10             : !!!!    LICENSE                                                                                                                 !!!!
      11             : !!!!                                                                                                                            !!!!
      12             : !!!!       https://github.com/cdslaborg/paramonte/blob/main/LICENSE.md                                                          !!!!
      13             : !!!!                                                                                                                            !!!!
      14             : !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
      15             : !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
      16             : 
      17             : !>  \brief
      18             : !>  This include file contains procedure implementations of the tests of [pm_matrixMulAdd](@ref pm_matrixMulAdd).
      19             : !>
      20             : !>  \fintest
      21             : !>
      22             : !>  \author
      23             : !>  \FatemehBagheri, 12:27 AM Tuesday, February 22, 2022, Dallas, TX
      24             : 
      25             : !%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
      26             : 
      27             : #if     IK_ENABLED
      28             : #define GET_CONJG(X) X
      29             : #define TYPE_KIND integer(IKC)
      30             :         TYPE_KIND   , parameter     :: lb = -10, ub = 10
      31             :         TYPE_KIND   , parameter     :: TOL = 0
      32             : #elif   CK_ENABLED
      33             : #define GET_CONJG(X) conjg(X)
      34             : #define TYPE_KIND complex(CKC)
      35             :         real(CKC)   , parameter     :: TOL = epsilon(0._CKC)**.66
      36             :         TYPE_KIND   , parameter     :: lb = (-1._CKC, -1._CKC), ub = (1._CKC, 1._CKC)
      37             : #elif   RK_ENABLED
      38             : #define GET_CONJG(X) X
      39             : #define TYPE_KIND real(RKC)
      40             :         real(RKC)   , parameter     :: TOL = epsilon(0._RKC)**.66
      41             :         TYPE_KIND   , parameter     :: lb = -1._RKC, ub = 1._RKC
      42             : #else
      43             : #error  "Unrecognized interface."
      44             : #endif
      45             :         logical(LK) :: renabled
      46             :         integer(IK) :: itry, begB, endB, begC, endC, irow, nrow, ncol, ndum, ndim, roffA, coffA, roffB, coffB, roffC, coffC, incB, incC
      47             :         integer(IK) , parameter     :: increments(*) = [(irow, irow = -3, -1), (irow, irow = 1, 3)]
      48             :         TYPE_KIND   , allocatable   :: matD(:,:), matT(:,:), matA(:,:), matB(:,:), matC(:,:), matO(:,:), matR(:,:)
      49             :         TYPE_KIND   , allocatable   :: vecA(:), vecB(:), vecC(:), vecO(:), vecR(:)
      50             :         TYPE_KIND   , allocatable   :: choices(:)
      51             :         TYPE_KIND   , parameter     :: ONE = 1, ZERO = 0
      52             :         TYPE_KIND                   :: alpha, beta
      53             : 
      54          13 :         assertion = .true._LK
      55             : 
      56        1313 :         do itry = 1, 100
      57             : 
      58             :             ! Build the matrices.
      59             : 
      60        1300 :             incB = getChoice(increments)
      61        1300 :             incC = getChoice(increments)
      62        1300 :             roffA = getUnifRand(0_IK, 3_IK)
      63        1300 :             coffA = getUnifRand(0_IK, 3_IK)
      64        1300 :             roffB = getUnifRand(0_IK, 3_IK)
      65        1300 :             coffB = getUnifRand(0_IK, 3_IK)
      66        1300 :             roffC = getUnifRand(0_IK, 3_IK)
      67        1300 :             coffC = getUnifRand(0_IK, 3_IK)
      68        1300 :             nrow = getUnifRand(0_IK, 10_IK)
      69        1300 :             ncol = getUnifRand(0_IK, 10_IK)
      70        1300 :             ndum = getUnifRand(0_IK, 10_IK)
      71        1300 :             ndim = getUnifRand(0_IK, 10_IK)
      72             : 
      73       93921 :             matA = getUnifRand(lb, ub, 2 * roffA + nrow, 2 * coffA + ndum)
      74       96664 :             matB = getUnifRand(lb, ub, 2 * roffB + ndum, 2 * coffB + ncol)
      75       95710 :             matC = getUnifRand(lb, ub, 2 * roffC + nrow, 2 * coffC + ncol)
      76             :             !print *, nrow, ndum, incB, incC
      77       14144 :             vecB = getUnifRand(lb, ub, max(0, 1 + (ndum - 1) * abs(incB)))
      78       14374 :             vecC = getUnifRand(lb, ub, max(0, 1 + (nrow - 1) * abs(incC)))
      79        1300 :             call setBegEnd()
      80             : 
      81        6500 :             choices = [ZERO, ONE, getUnifRand(lb, ub)]
      82        1300 :             alpha = getChoice(choices)
      83        1300 :             beta = getChoice(choices)
      84             : 
      85             :             ! BLAS - LEVEL 2: ?GEMV
      86             : 
      87       15488 :             vecO = vecC
      88       15488 :             vecR = vecC
      89        1300 :             renabled = nrow == 0_IK .or. ndum == 0_IK .or. (alpha == ZERO .and. beta == ONE)
      90             :             ! \bug
      91             :             ! gfortran 13.2 yields incorrect results in the last element of the output vecR in the expression below. 
      92             :             ! This happens in release mode (with heap memory in serial shared library) but not in debug mode, indicating that aggressive optimization has something to do with this potential bug.
      93             :             ! For now, it seems like converting the `merge()` to an if block resolves the error.
      94             :             !vecR(begC:endC:incC) = merge(vecO(begC:endC:incC), matmul(alpha * matA(roffA + 1 : roffA + nrow, coffA + 1 : coffA + ndum), vecB(begB:endB:incB)) + beta * vecO(begC:endC:incC), renabled)
      95        1300 :             if (renabled) then
      96        1717 :                 vecR(begC:endC:incC) = vecO(begC:endC:incC)
      97             :             else
      98      109068 :                 vecR(begC:endC:incC) = matmul(alpha * matA(roffA + 1 : roffA + nrow, coffA + 1 : coffA + ndum), vecB(begB:endB:incB)) + beta * vecO(begC:endC:incC)
      99             :             end if
     100             : 
     101        1300 :             if (alpha == ONE .and. beta == ONE .and. getUnifRand()) call testGEMV()
     102        1300 :             if (alpha == ONE .and. getUnifRand()) call testGEMV(beta = beta)
     103        1300 :             if (beta == ONE .and. getUnifRand()) call testGEMV(alpha)
     104        1300 :             call testGEMV(alpha, beta)
     105             : 
     106             :             ! BLAS - LEVEL 3: ?GEMM
     107             : 
     108       98240 :             matO = matC
     109       98240 :             matR = matO
     110        1300 :             renabled = nrow == 0_IK .or. ncol == 0_IK .or. ((alpha == ZERO .or. ndum == 0_IK) .and. beta == ONE)
     111        1300 :             if (.not. renabled) then
     112             :                 matR(roffC + 1 : roffC + nrow, coffC + 1 : coffC + ncol) = & ! LCOV_EXCL_LINE
     113      433656 :                 matmul(alpha * matA(roffA + 1 : roffA + nrow, coffA + 1 : coffA + ndum), matB(roffB + 1 : roffB + ndum, coffB + 1 : coffB + ncol)) + beta * matO(roffC + 1 : roffC + nrow, coffC + 1 : coffC + ncol)
     114             :             end if
     115             : 
     116        1300 :             if (alpha == ONE .and. beta == ONE .and. getUnifRand()) call testGEMM()
     117        1300 :             if (alpha == ONE .and. getUnifRand()) call testGEMM(beta = beta)
     118        1300 :             if (beta == ONE .and. getUnifRand()) call testGEMM(alpha)
     119        1300 :             call testGEMM(alpha, beta)
     120             : 
     121             :             ! BLAS - LEVEL 2: SHMV - Symmetric/Hermitian matrix.
     122             : 
     123        1300 :             ndim = ndum
     124        1300 :             incC = incB
     125        1300 :             renabled = ndim == 0_IK .or. (alpha == ZERO .and. beta == ONE)
     126       14144 :             vecC = getUnifRand(lb, ub, max(0, 1 + (ndim - 1) * abs(incC)))
     127       54078 :             matD = getUnifRand(lb, ub, ndim, ndim)
     128        1300 :             call setBegEnd()
     129             : #if         CK_ENABLED
     130        2373 :             do irow = 1, ndim
     131        2373 :                 matD(irow, irow) = matD(irow, irow)%re
     132             :             end do
     133             : #endif
     134        1300 :             if (alpha == ONE .and. beta == ONE .and. getUnifRand()) call testSHMV()
     135        1300 :             if (alpha == ONE .and. getUnifRand()) call testSHMV(beta = beta)
     136        1300 :             if (beta == ONE .and. getUnifRand()) call testSHMV(alpha)
     137        1300 :             call testSHMV(alpha, beta)
     138             : 
     139             :             ! BLAS - LEVEL 2: ?HPMV - Symmetric/Hermitian matrix.
     140             : 
     141        1300 :             ndim = ndum
     142        1300 :             incC = incB
     143        1300 :             renabled = ndim == 0_IK .or. (alpha == ZERO .and. beta == ONE)
     144       14144 :             vecC = getUnifRand(lb, ub, max(0, 1 + (ndim - 1) * abs(incC)))
     145       54078 :             matD = getUnifRand(lb, ub, ndim, ndim)
     146        1300 :             call setBegEnd()
     147             : #if         CK_ENABLED
     148        2373 :             do irow = 1, ndim
     149        2373 :                 matD(irow, irow) = matD(irow, irow)%re
     150             :             end do
     151             : #endif
     152        1300 :             if (alpha == ONE .and. beta == ONE .and. getUnifRand()) call testXPMV()
     153        1300 :             if (alpha == ONE .and. getUnifRand()) call testXPMV(beta = beta)
     154        1300 :             if (beta == ONE .and. getUnifRand()) call testXPMV(alpha)
     155        1300 :             call testXPMV(alpha, beta)
     156             : 
     157             :             ! BLAS - LEVEL 3: ?SHMM - Symmetric/Hermitian matrix.
     158             : 
     159       95710 :             matO = matC
     160       95710 :             matR = matO
     161        1300 :             renabled = nrow == 0_IK .or. ncol == 0_IK .or. (alpha == ZERO .and. beta == ONE)
     162             :             !if (.not. renabled) then
     163             :             !    matR(roffC + 1 : roffC + nrow, coffC + 1 : coffC + ncol) = & ! LCOV_EXCL_LINE
     164             :             !    matmul(alpha * matA(roffA + 1 : roffA + nrow, coffA + 1 : coffA + ndum), matB(roffB + 1 : roffB + ndum, coffB + 1 : coffB + ncol)) + beta * matO(roffC + 1 : roffC + nrow, coffC + 1 : coffC + ncol)
     165             :             !end if
     166        1300 :             if (alpha == ONE .and. beta == ONE .and. getUnifRand()) call testSHMM()
     167        1300 :             if (alpha == ONE .and. getUnifRand()) call testSHMM(beta = beta)
     168        1300 :             if (beta == ONE .and. getUnifRand()) call testSHMM(alpha)
     169        1313 :             call testSHMM(alpha, beta)
     170             : 
     171             :         end do
     172             : 
     173             :         !%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
     174             : 
     175             :     contains
     176             : 
     177             :         !%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
     178             : 
     179        3900 :         subroutine setBegEnd()
     180        3900 :             if (incB < 0) then
     181        1962 :                 begB = size(vecB)
     182        1962 :                 endB = 1
     183             :             else
     184        1938 :                 begB = 1
     185        1938 :                 endB = size(vecB)
     186             :             end if
     187        3900 :             if (incC < 0) then
     188        1933 :                 begC = size(vecC)
     189        1933 :                 endC = 1
     190             :             else
     191        1967 :                 begC = 1
     192        1967 :                 endC = size(vecC)
     193             :             end if
     194        3900 :         end subroutine
     195             : 
     196             :         !%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
     197             : 
     198        1814 :         subroutine testGEMV(alpha, beta)
     199             :             TYPE_KIND, intent(in), optional :: alpha, beta
     200             :             logical(LK) :: simplified
     201        1814 :             simplified = .not. (present(alpha) .and. present(beta))
     202             :             ! nothing
     203       20040 :             vecO = vecC
     204       81871 :             call setMatMulAdd(matA(roffA + 1 : roffA + nrow, coffA + 1 : coffA + ndum), vecB(begB:endB:incB), vecO(begC:endC:incC), alpha, beta)
     205        1814 :             call reportGEMV(__LINE__, simplified, operationA = "nothing")
     206             :             ! transSymm
     207       20040 :             vecO = vecC
     208             :             ! gfortran-11 bug: cannot pass transpose temporary array the second time for `transHerm`.
     209       56990 :             matD = transpose(matA(roffA + 1 : roffA + nrow, coffA + 1 : coffA + ndum))
     210             :             !print *, "transSymm", shape(matD), shape(vecB(begB:endB:incB)), shape(vecO(begC:endC:incC))
     211       28834 :             call setMatMulAdd(matD, transSymm, vecB(begB:endB:incB), vecO(begC:endC:incC), alpha, beta)
     212        1814 :             call reportGEMV(__LINE__, simplified, operationA = "transSymm")
     213             :             ! transHerm
     214       20040 :             vecO = vecC
     215       17959 :             matD = GET_CONJG(matD)
     216       28834 :             call setMatMulAdd(matD, transHerm, vecB(begB:endB:incB), vecO(begC:endC:incC), alpha, beta)
     217        1814 :             call reportGEMV(__LINE__, simplified, operationA = "transHerm")
     218             :             ! contiguous
     219        1814 :             if (present(alpha) .and. present(beta)) then
     220             :                 ! nothing
     221       14374 :                 vecO = vecC
     222        1300 :                 call setMatMulAdd(matA, vecB, vecO, alpha, beta, nrow, ndum, roffA, coffA, incB, incC)
     223        1300 :                 call reportGEMV(__LINE__, simplified, operationA = "nothing")
     224             :                 ! transSymm
     225       14374 :                 vecO = vecC
     226       93988 :                 matD = transpose(matA)
     227        1300 :                 call setMatMulAdd(matD, transSymm, vecB, vecO, alpha, beta, ndum, nrow, coffA, roffA, incB, incC)
     228        1300 :                 call reportGEMV(__LINE__, simplified, operationA = "transSymm")
     229             :                 ! transHerm
     230       14374 :                 vecO = vecC
     231       28819 :                 matD = GET_CONJG(matD)
     232        1300 :                 call setMatMulAdd(matD, transHerm, vecB, vecO, alpha, beta, ndum, nrow, coffA, roffA, incB, incC)
     233        1300 :                 call reportGEMV(__LINE__, simplified, operationA = "transHerm")
     234             :             end if
     235        1814 :         end subroutine
     236             : 
     237        9342 :         subroutine reportGEMV(line, simplified, operationA)
     238             :             integer, intent(in) :: line
     239             :             character(*), intent(in) :: operationA
     240             :             logical(LK) , intent(in) :: simplified
     241             :             logical(LK), allocatable :: tolerable(:)
     242        9342 :             type(display_type) :: disp
     243             : #if         IK_ENABLED
     244       25698 :             tolerable = vecR(begC:endC:incC) == vecO(begC:endC:incC)
     245             : #elif       CK_ENABLED || RK_ENABLED
     246      114924 :             tolerable = isClose(vecR, vecO, abstol = TOL)
     247             : #else
     248             : #error      "Unrecognized interface."
     249             : #endif
     250       79545 :             assertion = assertion .and. all(tolerable)
     251        9342 :             if (test%traceable .and. .not. assertion) then
     252             :                 ! LCOV_EXCL_START
     253             :                 call disp%show("GEMV")
     254             :                 call disp%show("renabled")
     255             :                 call disp%show( renabled )
     256             :                 call disp%show("simplified")
     257             :                 call disp%show( simplified )
     258             :                 call disp%show("operationA")
     259             :                 call disp%show( operationA )
     260             :                 call disp%show("shape(matA)")
     261             :                 call disp%show( shape(matA) )
     262             :                 call disp%show("shape(vecB)")
     263             :                 call disp%show( shape(vecB) )
     264             :                 call disp%show("shape(vecC)")
     265             :                 call disp%show( shape(vecC) )
     266             :                 call disp%show("[nrow, ndum]")
     267             :                 call disp%show( [nrow, ndum] )
     268             :                 call disp%show("[roffA, coffA, roffB, coffB, roffC, coffC, incB, incC]")
     269             :                 call disp%show( [roffA, coffA, roffB, coffB, roffC, coffC, incB, incC] )
     270             :                 call disp%show("[alpha, beta]")
     271             :                 call disp%show( [alpha, beta] )
     272             :                 call disp%show("matA(roffA + 1 : roffA + nrow, coffA + 1 : coffA + ndum)")
     273             :                 call disp%show( matA(roffA + 1 : roffA + nrow, coffA + 1 : coffA + ndum) )
     274             :                 call disp%show("vecB(begB:endB:incB)")
     275             :                 call disp%show( vecB(begB:endB:incB) )
     276             :                 call disp%show("vecC(begC:endC:incC)")
     277             :                 call disp%show( vecC(begC:endC:incC) )
     278             :                 call disp%show("vecO(begC:endC:incC)")
     279             :                 call disp%show( vecO(begC:endC:incC) )
     280             :                 call disp%show("vecR(begC:endC:incC)")
     281             :                 call disp%show( vecR(begC:endC:incC) )
     282             :                 call disp%show("vecB")
     283             :                 call disp%show( vecB )
     284             :                 call disp%show("vecC")
     285             :                 call disp%show( vecC )
     286             :                 call disp%show("vecO")
     287             :                 call disp%show( vecO )
     288             :                 call disp%show("vecR")
     289             :                 call disp%show( vecR )
     290             :                 call disp%show("tolerable")
     291             :                 call disp%show( tolerable )
     292             :                 call disp%show("pack(vecO, .not. tolerable)")
     293             :                 call disp%show( pack(vecO, .not. tolerable) )
     294             :                 call disp%show("pack(vecR, .not. tolerable)")
     295             :                 call disp%show( pack(vecR, .not. tolerable) )
     296             :                 call disp%show("pack(vecR - vecO, .not. tolerable)")
     297             :                 call disp%show( pack(vecR - vecO, .not. tolerable) )
     298             :                 call disp%show("TOL")
     299             :                 call disp%show( TOL )
     300             :                 ! LCOV_EXCL_STOP
     301             :             end if
     302        9342 :             call test%assert(assertion, SK_"GEMV test with the above specifications must successfully pass.", int(line, IK))
     303        9342 :         end subroutine
     304             : 
     305             :         !%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
     306             : 
     307        1794 :         subroutine testGEMM(alpha, beta)
     308             :             TYPE_KIND, intent(in), optional :: alpha, beta
     309             :             logical(LK) :: simplified
     310        1794 :             simplified = .not. (present(alpha) .and. present(beta))
     311             :             ! nothing, nothing
     312      132486 :             matO = matC
     313      218310 :             call setMatMulAdd(matA(roffA + 1 : roffA + nrow, coffA + 1 : coffA + ndum), matB(roffB + 1 : roffB + ndum, coffB + 1 : coffB + ncol), matO(roffC + 1 : roffC + nrow, coffC + 1 : coffC + ncol), alpha, beta)
     314        1794 :             call reportGEMM(__LINE__, simplified, operationA = "nothing", operationB = "nothing")
     315             :             ! transSymm, nothing
     316      132486 :             matO = matC
     317       56759 :             matD = transpose(matA(roffA + 1 : roffA + nrow, coffA + 1 : coffA + ndum))! gfortran-11 bug: cannot pass transpose temporary array the second time for `transHerm`.
     318      165364 :             call setMatMulAdd(matD, transSymm, matB(roffB + 1 : roffB + ndum, coffB + 1 : coffB + ncol), matO(roffC + 1 : roffC + nrow, coffC + 1 : coffC + ncol), alpha, beta)
     319        1794 :             call reportGEMM(__LINE__, simplified, operationA = "transSymm", operationB = "nothing")
     320             :             ! transHerm, nothing
     321      132486 :             matO = matC
     322       56759 :             matD = GET_CONJG(transpose(matA(roffA + 1 : roffA + nrow, coffA + 1 : coffA + ndum)))
     323      165364 :             call setMatMulAdd(matD, transHerm, matB(roffB + 1 : roffB + ndum, coffB + 1 : coffB + ncol), matO(roffC + 1 : roffC + nrow, coffC + 1 : coffC + ncol), alpha, beta)
     324        1794 :             call reportGEMM(__LINE__, simplified, operationA = "transHerm", operationB = "nothing")
     325             :             ! nothing, transSymm
     326      132486 :             matO = matC
     327       57231 :             matT = transpose(matB(roffB + 1 : roffB + ndum, coffB + 1 : coffB + ncol))! gfortran-11 bug: cannot pass transpose temporary array the second time for `transHerm`.
     328      164356 :             call setMatMulAdd(matA(roffA + 1 : roffA + nrow, coffA + 1 : coffA + ndum), matT, transSymm, matO(roffC + 1 : roffC + nrow, coffC + 1 : coffC + ncol), alpha, beta)
     329        1794 :             call reportGEMM(__LINE__, simplified, operationA = "nothing", operationB = "transSymm")
     330             :             ! transSymm, transSymm
     331      132486 :             matO = matC
     332       56759 :             matD = transpose(matA(roffA + 1 : roffA + nrow, coffA + 1 : coffA + ndum))! gfortran-11 bug: cannot pass transpose temporary array the second time for `transHerm`.
     333       57231 :             matT = transpose(matB(roffB + 1 : roffB + ndum, coffB + 1 : coffB + ncol))! gfortran-11 bug: cannot pass transpose temporary array the second time for `transHerm`.
     334      111410 :             call setMatMulAdd(matD, transSymm, matT, transSymm, matO(roffC + 1 : roffC + nrow, coffC + 1 : coffC + ncol), alpha, beta)
     335        1794 :             call reportGEMM(__LINE__, simplified, operationA = "transSymm", operationB = "transSymm")
     336             :             ! transHerm, transSymm
     337      132486 :             matO = matC
     338       56759 :             matD = GET_CONJG(transpose(matA(roffA + 1 : roffA + nrow, coffA + 1 : coffA + ndum)))! gfortran-11 bug: cannot pass transpose temporary array the second time for `transHerm`.
     339       57231 :             matT = transpose(matB(roffB + 1 : roffB + ndum, coffB + 1 : coffB + ncol))! gfortran-11 bug: cannot pass transpose temporary array the second time for `transHerm`.
     340      111410 :             call setMatMulAdd(matD, transHerm, matT, transSymm, matO(roffC + 1 : roffC + nrow, coffC + 1 : coffC + ncol), alpha, beta)
     341        1794 :             call reportGEMM(__LINE__, simplified, operationA = "transHerm", operationB = "transSymm")
     342             :             ! nothing, transHerm
     343      132486 :             matO = matC
     344       56534 :             matD = matA(roffA + 1 : roffA + nrow, coffA + 1 : coffA + ndum)
     345       57231 :             matT = GET_CONJG(transpose(matB(roffB + 1 : roffB + ndum, coffB + 1 : coffB + ncol)))
     346      111410 :             call setMatMulAdd(matD, matT, transHerm, matO(roffC + 1 : roffC + nrow, coffC + 1 : coffC + ncol), alpha, beta)
     347        1794 :             call reportGEMM(__LINE__, simplified, operationA = "transSymm", operationB = "transHerm")
     348             :             ! transSymm, transHerm
     349      132486 :             matO = matC
     350       56759 :             matD = transpose(matA(roffA + 1 : roffA + nrow, coffA + 1 : coffA + ndum))
     351       57231 :             matT = GET_CONJG(transpose(matB(roffB + 1 : roffB + ndum, coffB + 1 : coffB + ncol)))
     352      111410 :             call setMatMulAdd(matD, transSymm, matT, transHerm, matO(roffC + 1 : roffC + nrow, coffC + 1 : coffC + ncol), alpha, beta)
     353        1794 :             call reportGEMM(__LINE__, simplified, operationA = "transSymm", operationB = "transHerm")
     354             :             ! transHerm, transHerm
     355      132486 :             matO = matC
     356       56759 :             matD = GET_CONJG(transpose(matA(roffA + 1 : roffA + nrow, coffA + 1 : coffA + ndum)))
     357       57231 :             matT = GET_CONJG(transpose(matB(roffB + 1 : roffB + ndum, coffB + 1 : coffB + ncol)))
     358      111410 :             call setMatMulAdd(matD, transHerm, matT, transHerm, matO(roffC + 1 : roffC + nrow, coffC + 1 : coffC + ncol), alpha, beta)
     359        1794 :             call reportGEMM(__LINE__, simplified, operationA = "transHerm", operationB = "transHerm")
     360             :             ! contiguous
     361        1794 :             if (present(alpha) .and. present(beta)) then
     362             :                 ! nothing, nothing
     363       95710 :                 matO = matC
     364        1300 :                 call setMatMulAdd(matA, matB, matO, alpha, beta, nrow, ncol, ndum, roffA, coffA, roffB, coffB, roffC, coffC)
     365        1300 :                 call reportGEMM(__LINE__, simplified, operationA = "nothing", operationB = "nothing")
     366             :                 ! transSymm, nothing
     367       95710 :                 matO = matC
     368       93988 :                 matD = transpose(matA)
     369        1300 :                 call setMatMulAdd(matD, transSymm, matB, matO, alpha, beta, nrow, ncol, ndum, coffA, roffA, roffB, coffB, roffC, coffC)
     370        1300 :                 call reportGEMM(__LINE__, simplified, operationA = "transSymm", operationB = "nothing")
     371             :                 ! transHerm, nothing
     372       95710 :                 matO = matC
     373       28819 :                 matD = GET_CONJG(matD)
     374        1300 :                 call setMatMulAdd(matD, transHerm, matB, matO, alpha, beta, nrow, ncol, ndum, coffA, roffA, roffB, coffB, roffC, coffC)
     375        1300 :                 call reportGEMM(__LINE__, simplified, operationA = "transHerm", operationB = "nothing")
     376             :                 ! nothing, transSymm
     377       95710 :                 matO = matC
     378       96483 :                 matT = transpose(matB)
     379        1300 :                 call setMatMulAdd(matA, matT, transSymm, matO, alpha, beta, nrow, ncol, ndum, roffA, coffA, coffB, roffB, roffC, coffC)
     380        1300 :                 call reportGEMM(__LINE__, simplified, operationA = "nothing", operationB = "transSymm")
     381             :                 ! transSymm, transSymm
     382       95710 :                 matO = matC
     383       93988 :                 matD = transpose(matA)
     384       96483 :                 matT = transpose(matB)
     385        1300 :                 call setMatMulAdd(matD, transSymm, matT, transSymm, matO, alpha, beta, nrow, ncol, ndum, coffA, roffA, coffB, roffB, roffC, coffC)
     386        1300 :                 call reportGEMM(__LINE__, simplified, operationA = "transSymm", operationB = "transSymm")
     387             :                 ! transHerm, transSymm
     388       95710 :                 matO = matC
     389       93988 :                 matD = GET_CONJG(transpose(matA))
     390       96483 :                 matT = transpose(matB)
     391        1300 :                 call setMatMulAdd(matD, transHerm, matT, transSymm, matO, alpha, beta, nrow, ncol, ndum, coffA, roffA, coffB, roffB, roffC, coffC)
     392        1300 :                 call reportGEMM(__LINE__, simplified, operationA = "transHerm", operationB = "transSymm")
     393             :                 ! nothing, transHerm
     394       95710 :                 matO = matC
     395       96483 :                 matT = GET_CONJG(transpose(matB))
     396        1300 :                 call setMatMulAdd(matA, matT, transHerm, matO, alpha, beta, nrow, ncol, ndum, roffA, coffA, coffB, roffB, roffC, coffC)
     397        1300 :                 call reportGEMM(__LINE__, simplified, operationA = "nothing", operationB = "transHerm")
     398             :                 ! transSymm, transHerm
     399       95710 :                 matO = matC
     400       93988 :                 matD = transpose(matA)
     401       96483 :                 matT = GET_CONJG(transpose(matB))
     402        1300 :                 call setMatMulAdd(matD, transSymm, matT, transHerm, matO, alpha, beta, nrow, ncol, ndum, coffA, roffA, coffB, roffB, roffC, coffC)
     403        1300 :                 call reportGEMM(__LINE__, simplified, operationA = "transSymm", operationB = "transHerm")
     404             :                 ! transHerm, transHerm
     405       95710 :                 matO = matC
     406       93988 :                 matD = GET_CONJG(transpose(matA))
     407       96483 :                 matT = GET_CONJG(transpose(matB))
     408        1300 :                 call setMatMulAdd(matD, transHerm, matT, transHerm, matO, alpha, beta, nrow, ncol, ndum, coffA, roffA, coffB, roffB, roffC, coffC)
     409        1300 :                 call reportGEMM(__LINE__, simplified, operationA = "transHerm", operationB = "transHerm")
     410             :             end if
     411        1794 :         end subroutine
     412             : 
     413       27846 :         subroutine reportGEMM(line, simplified, operationA, operationB)
     414             :             integer, intent(in) :: line
     415             :             character(*), intent(in) :: operationA, operationB
     416             :             logical(LK) , intent(in) :: simplified
     417             :             logical(LK), allocatable :: tolerable(:,:)
     418       27846 :             type(display_type) :: disp
     419             : #if         IK_ENABLED
     420      789066 :             tolerable = matR == matO
     421             : #elif       CK_ENABLED || RK_ENABLED
     422     2494764 :             tolerable = isClose(matR, matO, abstol = TOL)
     423             : #else
     424             : #error      "Unrecognized interface."
     425             : #endif
     426     2025918 :             assertion = assertion .and. all(tolerable)
     427       27846 :             if (test%traceable .and. .not. assertion) then
     428             :                 ! LCOV_EXCL_START
     429             :                 call disp%show("GEMM")
     430             :                 call disp%show("renabled")
     431             :                 call disp%show( renabled )
     432             :                 call disp%show("simplified")
     433             :                 call disp%show( simplified )
     434             :                 call disp%show("operationA")
     435             :                 call disp%show( operationA )
     436             :                 call disp%show("operationB")
     437             :                 call disp%show( operationB )
     438             :                 call disp%show("shape(matA)")
     439             :                 call disp%show( shape(matA) )
     440             :                 call disp%show("shape(matB)")
     441             :                 call disp%show( shape(matB) )
     442             :                 call disp%show("shape(matC)")
     443             :                 call disp%show( shape(matC) )
     444             :                 call disp%show("[nrow, ndum, ncol]")
     445             :                 call disp%show( [nrow, ndum, ncol] )
     446             :                 call disp%show("[roffA, coffA, roffB, coffB, roffC, coffC]")
     447             :                 call disp%show( [roffA, coffA, roffB, coffB, roffC, coffC] )
     448             :                 call disp%show("[alpha, beta]")
     449             :                 call disp%show( [alpha, beta] )
     450             :                 call disp%show("matA") ! (roffA + 1 : roffA + nrow, coffA + 1 : coffA + ndum)
     451             :                 call disp%show( matA ) ! (roffA + 1 : roffA + nrow, coffA + 1 : coffA + ndum)
     452             :                 call disp%show("matB") ! (roffB + 1 : roffB + ndum, coffB + 1 : coffB + ncol)
     453             :                 call disp%show( matB ) ! (roffB + 1 : roffB + ndum, coffB + 1 : coffB + ncol)
     454             :                 call disp%show("matC(roffC + 1 : roffC + nrow, coffC + 1 : coffC + ncol)")
     455             :                 call disp%show( matC(roffC + 1 : roffC + nrow, coffC + 1 : coffC + ncol) )
     456             :                 call disp%show("matO(roffC + 1 : roffC + nrow, coffC + 1 : coffC + ncol)")
     457             :                 call disp%show( matO(roffC + 1 : roffC + nrow, coffC + 1 : coffC + ncol) )
     458             :                 call disp%show("matR(roffC + 1 : roffC + nrow, coffC + 1 : coffC + ncol)")
     459             :                 call disp%show( matR(roffC + 1 : roffC + nrow, coffC + 1 : coffC + ncol) )
     460             :                 call disp%show("matR - matO")
     461             :                 call disp%show( matR - matO )
     462             :                 call disp%show("tolerable")
     463             :                 call disp%show( tolerable )
     464             :                 call disp%show("pack(matO, .not. tolerable)")
     465             :                 call disp%show( pack(matO, .not. tolerable) )
     466             :                 call disp%show("pack(matR, .not. tolerable)")
     467             :                 call disp%show( pack(matR, .not. tolerable) )
     468             :                 call disp%show("pack(matR - matO, .not. tolerable)")
     469             :                 call disp%show( pack(matR - matO, .not. tolerable) )
     470             :                 call disp%show("TOL")
     471             :                 call disp%show( TOL )
     472             :                 ! LCOV_EXCL_STOP
     473             :             end if
     474       27846 :             call test%assert(assertion, SK_"GEMV test with the above specifications must successfully pass.", int(line, IK))
     475       27846 :         end subroutine
     476             : 
     477             :         !%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
     478             : 
     479        1829 :         subroutine testSHMV(alpha, beta)
     480             :             TYPE_KIND, intent(in), optional :: alpha, beta
     481             :             TYPE_KIND :: alpha_def, beta_def
     482             :             logical(LK) :: simplified
     483         849 :             alpha_def = 1; beta_def = 1
     484        1829 :             if (present(beta)) beta_def = beta
     485        1829 :             if (present(alpha)) alpha_def = alpha
     486        1829 :             simplified = .not. (present(alpha) .and. present(beta))
     487             :             ! symmetric uppDia
     488       20804 :             vecO = vecC
     489       20804 :             vecR = vecC ! this initial assignment is important.
     490        1829 :             call setMatCopy(matD, rdpack, matD, rdpack, uppDia, transSymm)
     491             :             ! \bug
     492             :             ! gfortran 13.2 yields incorrect results in the last element of the output vecR in the expression below. 
     493             :             ! This happens in release mode (with heap memory in serial shared library) but not in debug mode, indicating that aggressive optimization has something to do with this potential bug.
     494             :             ! For now, it seems like converting the `merge()` to an if block resolves the error.
     495             :             !vecR(begC:endC:incC) = merge(vecO(begC:endC:incC), matmul(alpha_def * matD, vecB(begB:endB:incB)) + beta_def * vecO(begC:endC:incC), renabled)
     496        1829 :             if (renabled) then
     497        1485 :                 vecR(begC:endC:incC) = vecO(begC:endC:incC)
     498             :             else
     499      200065 :                 vecR(begC:endC:incC) = matmul(alpha_def * matD, vecB(begB:endB:incB)) + beta_def * vecO(begC:endC:incC)
     500             :             end if
     501      146158 :             matT = getMatCopy(rdpack, matD, rdpack, uppDia)
     502       28583 :             call setMatMulAdd(matT, symmetric, uppDia, vecB(begB:endB:incB), vecO(begC:endC:incC), alpha, beta)
     503        1829 :             call reportSHMV(__LINE__, simplified, classA = "symmetric", subsetA = "uppDia")
     504        1829 :             if (.not. simplified) then ! contiguous
     505       14144 :                 vecO = vecC
     506        3900 :                 call setRebilled(matT, fill = ZERO, lb = 1 - [roffA, coffA], ub = shape(matT))
     507        1300 :                 call setMatMulAdd(matT, symmetric, uppDia, vecB, vecO, alpha, beta, ndim, roffA, coffA, incB, incC)
     508        1300 :                 call reportSHMV(__LINE__, simplified, classA = "symmetric", subsetA = "uppDia")
     509             :             end if
     510             :             ! symmetric lowDia
     511       19689 :             vecO = vecC
     512       19689 :             vecR = vecC ! this initial assignment is important.
     513        1829 :             call setMatCopy(matD, rdpack, matD, rdpack, lowDia, transSymm)
     514             :             ! \bug
     515             :             ! gfortran 13.2 yields incorrect results in the last element of the output vecR in the expression below. 
     516             :             ! This happens in release mode (with heap memory in serial shared library) but not in debug mode, indicating that aggressive optimization has something to do with this potential bug.
     517             :             ! For now, it seems like converting the `merge()` to an if block resolves the error.
     518             :             !vecR(begC:endC:incC) = merge(vecO(begC:endC:incC), matmul(alpha_def * matD, vecB(begB:endB:incB)) + beta_def * vecO(begC:endC:incC), renabled)
     519        1829 :             if (renabled) then
     520        1485 :                 vecR(begC:endC:incC) = vecO(begC:endC:incC)
     521             :             else
     522      200065 :                 vecR(begC:endC:incC) = matmul(alpha_def * matD, vecB(begB:endB:incB)) + beta_def * vecO(begC:endC:incC)
     523             :             end if
     524      146158 :             matT = getMatCopy(rdpack, matD, rdpack, lowDia)
     525       28583 :             call setMatMulAdd(matT, symmetric, lowDia, vecB(begB:endB:incB), vecO(begC:endC:incC), alpha, beta)
     526        1829 :             call reportSHMV(__LINE__, simplified, classA = "symmetric", subsetA = "lowDia")
     527        1829 :             if (.not. simplified) then ! contiguous
     528       14144 :                 vecO = vecC
     529        3900 :                 call setRebilled(matT, fill = ZERO, lb = 1 - [roffA, coffA], ub = shape(matT))
     530        1300 :                 call setMatMulAdd(matT, symmetric, lowDia, vecB, vecO, alpha, beta, ndim, roffA, coffA, incB, incC)
     531        1300 :                 call reportSHMV(__LINE__, simplified, classA = "symmetric", subsetA = "lowDia")
     532             :             end if
     533             :             ! hermitian uppDia
     534       19689 :             vecO = vecC
     535       19689 :             vecR = vecC ! this initial assignment is important.
     536        1829 :             call setMatCopy(matD, rdpack, matD, rdpack, uppDia, transHerm)
     537             :             ! \bug
     538             :             ! gfortran 13.2 yields incorrect results in the last element of the output vecR in the expression below. 
     539             :             ! This happens in release mode (with heap memory in serial shared library) but not in debug mode, indicating that aggressive optimization has something to do with this potential bug.
     540             :             ! For now, it seems like converting the `merge()` to an if block resolves the error.
     541             :             !vecR(begC:endC:incC) = merge(vecO(begC:endC:incC), matmul(alpha_def * matD, vecB(begB:endB:incB)) + beta_def * vecO(begC:endC:incC), renabled)
     542        1829 :             if (renabled) then
     543        1485 :                 vecR(begC:endC:incC) = vecO(begC:endC:incC)
     544             :             else
     545      200065 :                 vecR(begC:endC:incC) = matmul(alpha_def * matD, vecB(begB:endB:incB)) + beta_def * vecO(begC:endC:incC)
     546             :             end if
     547      146158 :             matT = getMatCopy(rdpack, matD, rdpack, uppDia)
     548       28583 :             call setMatMulAdd(matT, hermitian, uppDia, vecB(begB:endB:incB), vecO(begC:endC:incC), alpha, beta)
     549        1829 :             call reportSHMV(__LINE__, simplified, classA = "hermitian", subsetA = "uppDia")
     550        1829 :             if (.not. simplified) then ! contiguous
     551       14144 :                 vecO = vecC
     552        3900 :                 call setRebilled(matT, fill = ZERO, lb = 1 - [roffA, coffA], ub = shape(matT))
     553        1300 :                 call setMatMulAdd(matT, hermitian, uppDia, vecB, vecO, alpha, beta, ndim, roffA, coffA, incB, incC)
     554        1300 :                 call reportSHMV(__LINE__, simplified, classA = "hermitian", subsetA = "uppDia")
     555             :             end if
     556             :             ! hermitian lowDia
     557       19689 :             vecO = vecC
     558       19689 :             vecR = vecC ! this initial assignment is important.
     559        1829 :             call setMatCopy(matD, rdpack, matD, rdpack, lowDia, transHerm)
     560             :             ! \bug
     561             :             ! gfortran 13.2 yields incorrect results in the last element of the output vecR in the expression below. 
     562             :             ! This happens in release mode (with heap memory in serial shared library) but not in debug mode, indicating that aggressive optimization has something to do with this potential bug.
     563             :             ! For now, it seems like converting the `merge()` to an if block resolves the error.
     564             :             !vecR(begC:endC:incC) = merge(vecO(begC:endC:incC), matmul(alpha_def * matD, vecB(begB:endB:incB)) + beta_def * vecO(begC:endC:incC), renabled)
     565        1829 :             if (renabled) then
     566        1485 :                 vecR(begC:endC:incC) = vecO(begC:endC:incC)
     567             :             else
     568      200065 :                 vecR(begC:endC:incC) = matmul(alpha_def * matD, vecB(begB:endB:incB)) + beta_def * vecO(begC:endC:incC)
     569             :             end if
     570      146158 :             matT = getMatCopy(rdpack, matD, rdpack, lowDia)
     571       28583 :             call setMatMulAdd(matT, hermitian, lowDia, vecB(begB:endB:incB), vecO(begC:endC:incC), alpha, beta)
     572        1829 :             call reportSHMV(__LINE__, simplified, classA = "hermitian", subsetA = "lowDia")
     573        1829 :             if (.not. simplified) then ! contiguous
     574       14144 :                 vecO = vecC
     575        3900 :                 call setRebilled(matT, fill = ZERO, lb = 1 - [roffA, coffA], ub = shape(matT))
     576        1300 :                 call setMatMulAdd(matT, hermitian, lowDia, vecB, vecO, alpha, beta, ndim, roffA, coffA, incB, incC)
     577        1300 :                 call reportSHMV(__LINE__, simplified, classA = "hermitian", subsetA = "lowDia")
     578             :             end if
     579        1829 :         end subroutine
     580             : 
     581       12516 :         subroutine reportSHMV(line, simplified, classA, subsetA)
     582             :             integer, intent(in) :: line
     583             :             character(*), intent(in) :: classA, subsetA
     584             :             logical(LK) , intent(in) :: simplified
     585             :             logical(LK), allocatable :: tolerable(:)
     586       12516 :             type(display_type) :: disp
     587             : #if         IK_ENABLED
     588       50372 :             tolerable = vecR == vecO
     589             : #elif       CK_ENABLED || RK_ENABLED
     590      154608 :             tolerable = isClose(vecR, vecO, abstol = TOL)
     591             : #else
     592             : #error      "Unrecognized interface."
     593             : #endif
     594      122816 :             assertion = assertion .and. all(tolerable)
     595       12516 :             if (test%traceable .and. .not. assertion) then
     596             :                 ! LCOV_EXCL_START
     597             :                 call disp%show("SHMV")
     598             :                 call disp%show("renabled")
     599             :                 call disp%show( renabled )
     600             :                 call disp%show("simplified")
     601             :                 call disp%show( simplified )
     602             :                 call disp%show("classA")
     603             :                 call disp%show( classA )
     604             :                 call disp%show("subsetA")
     605             :                 call disp%show( subsetA )
     606             :                 call disp%show("shape(matT)")
     607             :                 call disp%show( shape(matT) )
     608             :                 call disp%show("shape(vecB)")
     609             :                 call disp%show( shape(vecB) )
     610             :                 call disp%show("shape(vecC)")
     611             :                 call disp%show( shape(vecC) )
     612             :                 call disp%show("[roffA, coffA]")
     613             :                 call disp%show( [roffA, coffA] )
     614             :                 call disp%show("[ndim, incB, incC]")
     615             :                 call disp%show( [ndim, incB, incC] )
     616             :                 call disp%show("[alpha, beta]")
     617             :                 call disp%show( [alpha, beta] )
     618             :                 call disp%show("matD")
     619             :                 call disp%show( matD )
     620             :                 call disp%show("matT")
     621             :                 call disp%show( matT )
     622             :                 call disp%show("vecB(begB:endB:incB)")
     623             :                 call disp%show( vecB(begB:endB:incB) )
     624             :                 call disp%show("vecC(begC:endC:incC)")
     625             :                 call disp%show( vecC(begC:endC:incC) )
     626             :                 call disp%show("vecO(begC:endC:incC)")
     627             :                 call disp%show( vecO(begC:endC:incC) )
     628             :                 call disp%show("vecR(begC:endC:incC)")
     629             :                 call disp%show( vecR(begC:endC:incC) )
     630             :                 call disp%show("vecR(begC:endC:incC) - vecO(begC:endC:incC)")
     631             :                 call disp%show( vecR(begC:endC:incC) - vecO(begC:endC:incC) )
     632             :                 call disp%show("tolerable")
     633             :                 call disp%show( tolerable )
     634             :                 call disp%show("pack(vecO, .not. tolerable)")
     635             :                 call disp%show( pack(vecO, .not. tolerable) )
     636             :                 call disp%show("pack(vecR, .not. tolerable)")
     637             :                 call disp%show( pack(vecR, .not. tolerable) )
     638             :                 call disp%show("pack(vecR - vecO, .not. tolerable)")
     639             :                 call disp%show( pack(vecR - vecO, .not. tolerable) )
     640             :                 call disp%show("TOL")
     641             :                 call disp%show( TOL )
     642             :                 ! LCOV_EXCL_STOP
     643             :             end if
     644       12516 :             call test%assert(assertion, SK_"SHMV test with the above specifications must successfully pass.", int(line, IK))
     645       12516 :         end subroutine
     646             : 
     647             :         !%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
     648             : 
     649        1846 :         subroutine testSHMM(alpha, beta)
     650             :             TYPE_KIND, intent(in), optional :: alpha, beta
     651             :             TYPE_KIND :: alpha_def, beta_def
     652             :             logical(LK) :: simplified
     653         852 :             alpha_def = 1; beta_def = 1
     654        1846 :             if (present(beta)) beta_def = beta
     655        1846 :             if (present(alpha)) alpha_def = alpha
     656        1846 :             simplified = .not. (present(alpha) .and. present(beta))
     657             :             !%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
     658             :             ! matA symmetric uppDia
     659      137003 :             matO = matC
     660      137003 :             matR = matC ! this initial assignment is important.
     661      155764 :             matA = getUnifRand(lb, ub, 2 * roffA + nrow, 2 * coffA + nrow)
     662      140263 :             matB = getUnifRand(lb, ub, 2 * roffB + nrow, 2 * coffB + ncol)
     663             :             associate(sliceA => matA(roffA + 1 : roffA + nrow, coffA + 1 : coffA + nrow), sliceB => matB(roffB + 1 : roffB + nrow, coffB + 1 : coffB + ncol), sliceO => matO(roffC + 1 : roffC + nrow, coffC + 1 : coffC + ncol))
     664       79904 :                 matD = sliceA
     665        1846 :                 call setMatCopy(matD, rdpack, matD, rdpack, uppDia, transSymm)
     666      809582 :                 if (.not. renabled) matR(roffC + 1 : roffC + nrow, coffC + 1 : coffC + ncol) = matmul(alpha_def * matD, sliceB) + beta_def * sliceO
     667        3692 :                 call setMatMulAdd(sliceA, symmetric, uppDia, sliceB, sliceO, alpha, beta)
     668             :             end associate
     669        1846 :             call reportSHMM(__LINE__, simplified, classA = "symmetric", subsetA = "uppDia", classB = "matrix", subsetB = "uppLowDia")
     670        1846 :             if (.not. simplified) then ! contiguous
     671       95710 :                 matO = matC
     672        1300 :                 call setMatMulAdd(matA, symmetric, uppDia, matB, matO, alpha, beta, nrow, ncol, roffA, coffA, roffB, coffB, roffC, coffC)
     673        1300 :                 call reportSHMM(__LINE__, simplified, classA = "symmetric", subsetA = "uppDia", classB = "matrix", subsetB = "uppLowDia")
     674             :             end if
     675             :             ! matA symmetric lowDia
     676      137003 :             matO = matC
     677      137003 :             matR = matC ! this initial assignment is important.
     678      155764 :             matA = getUnifRand(lb, ub, 2 * roffA + nrow, 2 * coffA + nrow)
     679      140263 :             matB = getUnifRand(lb, ub, 2 * roffB + nrow, 2 * coffB + ncol)
     680             :             associate(sliceA => matA(roffA + 1 : roffA + nrow, coffA + 1 : coffA + nrow), sliceB => matB(roffB + 1 : roffB + nrow, coffB + 1 : coffB + ncol), sliceO => matO(roffC + 1 : roffC + nrow, coffC + 1 : coffC + ncol))
     681       79904 :                 matD = sliceA
     682        1846 :                 call setMatCopy(matD, rdpack, matD, rdpack, lowDia, transSymm)
     683      809582 :                 if (.not. renabled) matR(roffC + 1 : roffC + nrow, coffC + 1 : coffC + ncol) = matmul(alpha_def * matD, sliceB) + beta_def * sliceO
     684        1846 :                 call setMatMulAdd(sliceA, symmetric, lowDia, sliceB, sliceO, alpha, beta)
     685        3692 :                 call reportSHMM(__LINE__, simplified, classA = "symmetric", subsetA = "lowDia", classB = "matrix", subsetB = "uppLowDia")
     686             :             end associate
     687        1846 :             if (.not. simplified) then ! contiguous
     688       95710 :                 matO = matC
     689        1300 :                 call setMatMulAdd(matA, symmetric, lowDia, matB, matO, alpha, beta, nrow, ncol, roffA, coffA, roffB, coffB, roffC, coffC)
     690        1300 :                 call reportSHMM(__LINE__, simplified, classA = "symmetric", subsetA = "lowDia", classB = "matrix", subsetB = "uppLowDia")
     691             :             end if
     692             :             ! matA hermitian uppDia
     693      137003 :             matO = matC
     694      137003 :             matR = matC ! this initial assignment is important.
     695      155764 :             matA = getUnifRand(lb, ub, 2 * roffA + nrow, 2 * coffA + nrow)
     696             : #if         CK_ENABLED
     697        3597 :             do irow = 1, nrow; matA(roffA + irow, coffA + irow) = matA(roffA + irow, coffA + irow)%re; end do
     698             : #endif
     699      140263 :             matB = getUnifRand(lb, ub, 2 * roffB + nrow, 2 * coffB + ncol)
     700             :             associate(sliceA => matA(roffA + 1 : roffA + nrow, coffA + 1 : coffA + nrow), sliceB => matB(roffB + 1 : roffB + nrow, coffB + 1 : coffB + ncol), sliceO => matO(roffC + 1 : roffC + nrow, coffC + 1 : coffC + ncol))
     701       79904 :                 matD = sliceA
     702        1846 :                 call setMatCopy(matD, rdpack, matD, rdpack, uppDia, transHerm)
     703      809582 :                 if (.not. renabled) matR(roffC + 1 : roffC + nrow, coffC + 1 : coffC + ncol) = matmul(alpha_def * matD, sliceB) + beta_def * sliceO
     704        3692 :                 call setMatMulAdd(sliceA, hermitian, uppDia, sliceB, sliceO, alpha, beta)
     705             :             end associate
     706        1846 :             call reportSHMM(__LINE__, simplified, classA = "hermitian", subsetA = "uppDia", classB = "matrix", subsetB = "uppLowDia")
     707        1846 :             if (.not. simplified) then ! contiguous
     708       95710 :                 matO = matC
     709        1300 :                 call setMatMulAdd(matA, hermitian, uppDia, matB, matO, alpha, beta, nrow, ncol, roffA, coffA, roffB, coffB, roffC, coffC)
     710        1300 :                 call reportSHMM(__LINE__, simplified, classA = "hermitian", subsetA = "uppDia", classB = "matrix", subsetB = "uppLowDia")
     711             :             end if
     712             :             ! matA hermitian lowDia
     713      137003 :             matO = matC
     714      137003 :             matR = matC ! this initial assignment is important.
     715      155764 :             matA = getUnifRand(lb, ub, 2 * roffA + nrow, 2 * coffA + nrow)
     716             : #if         CK_ENABLED
     717        3597 :             do irow = 1, nrow; matA(roffA + irow, coffA + irow) = matA(roffA + irow, coffA + irow)%re; end do
     718             : #endif
     719      140263 :             matB = getUnifRand(lb, ub, 2 * roffB + nrow, 2 * coffB + ncol)
     720             :             associate(sliceA => matA(roffA + 1 : roffA + nrow, coffA + 1 : coffA + nrow), sliceB => matB(roffB + 1 : roffB + nrow, coffB + 1 : coffB + ncol), sliceO => matO(roffC + 1 : roffC + nrow, coffC + 1 : coffC + ncol))
     721       79904 :                 matD = sliceA
     722        1846 :                 call setMatCopy(matD, rdpack, matD, rdpack, lowDia, transHerm)
     723      809582 :                 if (.not. renabled) matR(roffC + 1 : roffC + nrow, coffC + 1 : coffC + ncol) = matmul(alpha_def * matD, sliceB) + beta_def * sliceO
     724        3692 :                 call setMatMulAdd(sliceA, hermitian, lowDia, sliceB, sliceO, alpha, beta)
     725             :             end associate
     726        1846 :             call reportSHMM(__LINE__, simplified, classA = "hermitian", subsetA = "lowDia", classB = "matrix", subsetB = "uppLowDia")
     727        1846 :             if (.not. simplified) then ! contiguous
     728       95710 :                 matO = matC
     729        1300 :                 call setMatMulAdd(matA, hermitian, lowDia, matB, matO, alpha, beta, nrow, ncol, roffA, coffA, roffB, coffB, roffC, coffC)
     730        1300 :                 call reportSHMM(__LINE__, simplified, classA = "hermitian", subsetA = "lowDia", classB = "matrix", subsetB = "uppLowDia")
     731             :             end if
     732             :             !%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
     733             :             ! matB symmetric uppDia
     734      137003 :             matO = matC
     735      137003 :             matR = matC ! this initial assignment is important.
     736      136325 :             matA = getUnifRand(lb, ub, 2 * roffA + nrow, 2 * coffA + ncol)
     737      157890 :             matB = getUnifRand(lb, ub, 2 * roffB + ncol, 2 * coffB + ncol)
     738             :             associate(sliceA => matA(roffA + 1 : roffA + nrow, coffA + 1 : coffA + ncol), sliceB => matB(roffB + 1 : roffB + ncol, coffB + 1 : coffB + ncol), sliceO => matO(roffC + 1 : roffC + nrow, coffC + 1 : coffC + ncol))
     739       78218 :                 matD = sliceB
     740        1846 :                 call setMatCopy(matD, rdpack, matD, rdpack, uppDia, transSymm)
     741     1381632 :                 if (.not. renabled) matR(roffC + 1 : roffC + nrow, coffC + 1 : coffC + ncol) = matmul(alpha_def * sliceA, matD) + beta_def * sliceO
     742        3692 :                 call setMatMulAdd(sliceA, sliceB, symmetric, uppDia, sliceO, alpha, beta)
     743             :             end associate
     744        1846 :             call reportSHMM(__LINE__, simplified, classB = "symmetric", subsetB = "uppDia", classA = "matrix", subsetA = "uppLowDia")
     745        1846 :             if (.not. simplified) then ! contiguous
     746       95710 :                 matO = matC
     747        1300 :                 call setMatMulAdd(matA, matB, symmetric, uppDia, matO, alpha, beta, nrow, ncol, roffA, coffA, roffB, coffB, roffC, coffC)
     748        1300 :                 call reportSHMM(__LINE__, simplified, classB = "symmetric", subsetB = "uppDia", classA = "matrix", subsetA = "uppLowDia")
     749             :             end if
     750             :             ! matB symmetric lowDia
     751      137003 :             matO = matC
     752      137003 :             matR = matC ! this initial assignment is important.
     753      136325 :             matA = getUnifRand(lb, ub, 2 * roffA + nrow, 2 * coffA + ncol)
     754      157890 :             matB = getUnifRand(lb, ub, 2 * roffB + ncol, 2 * coffB + ncol)
     755             :             associate(sliceA => matA(roffA + 1 : roffA + nrow, coffA + 1 : coffA + ncol), sliceB => matB(roffB + 1 : roffB + ncol, coffB + 1 : coffB + ncol), sliceO => matO(roffC + 1 : roffC + nrow, coffC + 1 : coffC + ncol))
     756       78218 :                 matD = sliceB
     757        1846 :                 call setMatCopy(matD, rdpack, matD, rdpack, lowDia, transSymm)
     758     1381632 :                 if (.not. renabled) matR(roffC + 1 : roffC + nrow, coffC + 1 : coffC + ncol) = matmul(alpha_def * sliceA, matD) + beta_def * sliceO
     759        1846 :                 call setMatMulAdd(sliceA, sliceB, symmetric, lowDia, sliceO, alpha, beta)
     760        3692 :                 call reportSHMM(__LINE__, simplified, classB = "symmetric", subsetB = "lowDia", classA = "matrix", subsetA = "uppLowDia")
     761             :             end associate
     762        1846 :             if (.not. simplified) then ! contiguous
     763       95710 :                 matO = matC
     764        1300 :                 call setMatMulAdd(matA, matB, symmetric, lowDia, matO, alpha, beta, nrow, ncol, roffA, coffA, roffB, coffB, roffC, coffC)
     765        1300 :                 call reportSHMM(__LINE__, simplified, classB = "symmetric", subsetB = "lowDia", classA = "matrix", subsetA = "uppLowDia")
     766             :             end if
     767             :             ! matB hermitian uppDia
     768      137003 :             matO = matC
     769      137003 :             matR = matC ! this initial assignment is important.
     770      136325 :             matA = getUnifRand(lb, ub, 2 * roffA + nrow, 2 * coffA + ncol)
     771      157890 :             matB = getUnifRand(lb, ub, 2 * roffB + ncol, 2 * coffB + ncol)
     772             : #if         CK_ENABLED
     773        3401 :             do irow = 1, ncol; matB(roffB + irow, coffB + irow) = matB(roffB + irow, coffB + irow)%re; end do
     774             : #endif
     775             :             associate(sliceA => matA(roffA + 1 : roffA + nrow, coffA + 1 : coffA + ncol), sliceB => matB(roffB + 1 : roffB + ncol, coffB + 1 : coffB + ncol), sliceO => matO(roffC + 1 : roffC + nrow, coffC + 1 : coffC + ncol))
     776       78218 :                 matD = sliceB
     777        1846 :                 call setMatCopy(matD, rdpack, matD, rdpack, uppDia, transHerm)
     778     1381632 :                 if (.not. renabled) matR(roffC + 1 : roffC + nrow, coffC + 1 : coffC + ncol) = matmul(alpha_def * sliceA, matD) + beta_def * sliceO
     779        3692 :                 call setMatMulAdd(sliceA, sliceB, hermitian, uppDia, sliceO, alpha, beta)
     780             :             end associate
     781        1846 :             call reportSHMM(__LINE__, simplified, classB = "hermitian", subsetB = "uppDia", classA = "matrix", subsetA = "uppLowDia")
     782        1846 :             if (.not. simplified) then ! contiguous
     783       95710 :                 matO = matC
     784        1300 :                 call setMatMulAdd(matA, matB, hermitian, uppDia, matO, alpha, beta, nrow, ncol, roffA, coffA, roffB, coffB, roffC, coffC)
     785        1300 :                 call reportSHMM(__LINE__, simplified, classB = "hermitian", subsetB = "uppDia", classA = "matrix", subsetA = "uppLowDia")
     786             :             end if
     787             :             ! matB hermitian lowDia
     788      137003 :             matO = matC
     789      137003 :             matR = matC ! this initial assignment is important.
     790      136325 :             matA = getUnifRand(lb, ub, 2 * roffA + nrow, 2 * coffA + ncol)
     791      157890 :             matB = getUnifRand(lb, ub, 2 * roffB + ncol, 2 * coffB + ncol)
     792             : #if         CK_ENABLED
     793        3401 :             do irow = 1, ncol; matB(roffB + irow, coffB + irow) = matB(roffB + irow, coffB + irow)%re; end do
     794             : #endif
     795             :             associate(sliceA => matA(roffA + 1 : roffA + nrow, coffA + 1 : coffA + ncol), sliceB => matB(roffB + 1 : roffB + ncol, coffB + 1 : coffB + ncol), sliceO => matO(roffC + 1 : roffC + nrow, coffC + 1 : coffC + ncol))
     796       78218 :                 matD = sliceB
     797        1846 :                 call setMatCopy(matD, rdpack, matD, rdpack, lowDia, transHerm)
     798     1381632 :                 if (.not. renabled) matR(roffC + 1 : roffC + nrow, coffC + 1 : coffC + ncol) = matmul(alpha_def * sliceA, matD) + beta_def * sliceO
     799        3692 :                 call setMatMulAdd(sliceA, sliceB, hermitian, lowDia, sliceO, alpha, beta)
     800             :             end associate
     801        1846 :             call reportSHMM(__LINE__, simplified, classB = "hermitian", subsetB = "lowDia", classA = "matrix", subsetA = "uppLowDia")
     802        1846 :             if (.not. simplified) then ! contiguous
     803       95710 :                 matO = matC
     804        1300 :                 call setMatMulAdd(matA, matB, hermitian, lowDia, matO, alpha, beta, nrow, ncol, roffA, coffA, roffB, coffB, roffC, coffC)
     805        1300 :                 call reportSHMM(__LINE__, simplified, classB = "hermitian", subsetB = "lowDia", classA = "matrix", subsetA = "uppLowDia")
     806             :             end if
     807        1846 :         end subroutine
     808             : 
     809       25168 :         subroutine reportSHMM(line, simplified, classA, subsetA, classB, subsetB)
     810             :             integer, intent(in) :: line
     811             :             character(*), intent(in) :: classA, subsetA, classB, subsetB
     812             :             logical(LK) , intent(in) :: simplified
     813             :             logical(LK), allocatable :: tolerable(:,:)
     814       25168 :             type(display_type) :: disp
     815             : #if         IK_ENABLED
     816      721912 :             tolerable = matR == matO
     817             : #elif       CK_ENABLED || RK_ENABLED
     818     2248576 :             tolerable = isClose(matR, matO, abstol = TOL)
     819             : #else
     820             : #error      "Unrecognized interface."
     821             : #endif
     822     1836536 :             assertion = assertion .and. all(tolerable)
     823       25168 :             if (test%traceable .and. .not. assertion) then
     824             :                 ! LCOV_EXCL_START
     825             :                 call disp%show("SHMM")
     826             :                 call disp%show("renabled")
     827             :                 call disp%show( renabled )
     828             :                 call disp%show("simplified")
     829             :                 call disp%show( simplified )
     830             :                 call disp%show("classA")
     831             :                 call disp%show( classA )
     832             :                 call disp%show("subsetA")
     833             :                 call disp%show( subsetA )
     834             :                 call disp%show("classB")
     835             :                 call disp%show( classB )
     836             :                 call disp%show("subsetB")
     837             :                 call disp%show( subsetB )
     838             :                 call disp%show("shape(matA)")
     839             :                 call disp%show( shape(matA) )
     840             :                 call disp%show("shape(matB)")
     841             :                 call disp%show( shape(matB) )
     842             :                 call disp%show("shape(matC)")
     843             :                 call disp%show( shape(matC) )
     844             :                 call disp%show("[nrow, ncol]")
     845             :                 call disp%show( [nrow, ncol] )
     846             :                 call disp%show("[roffA, coffA, roffB, coffB, roffC, coffC]")
     847             :                 call disp%show( [roffA, coffA, roffB, coffB, roffC, coffC] )
     848             :                 call disp%show("[alpha, beta]")
     849             :                 call disp%show( [alpha, beta] )
     850             :                 call disp%show("matA(roffA + 1 : roffA + nrow, coffA + 1 : coffA + nrow)")
     851             :                 call disp%show( matA(roffA + 1 : roffA + nrow, coffA + 1 : coffA + nrow) )
     852             :                 call disp%show("matB(roffB + 1 : roffB + nrow, coffB + 1 : coffB + ncol)")
     853             :                 call disp%show( matB(roffB + 1 : roffB + nrow, coffB + 1 : coffB + ncol) )
     854             :                 call disp%show("matC(roffC + 1 : roffC + nrow, coffC + 1 : coffC + ncol)")
     855             :                 call disp%show( matC(roffC + 1 : roffC + nrow, coffC + 1 : coffC + ncol) )
     856             :                 call disp%show("matO(roffC + 1 : roffC + nrow, coffC + 1 : coffC + ncol)")
     857             :                 call disp%show( matO(roffC + 1 : roffC + nrow, coffC + 1 : coffC + ncol) )
     858             :                 call disp%show("matR(roffC + 1 : roffC + nrow, coffC + 1 : coffC + ncol)")
     859             :                 call disp%show( matR(roffC + 1 : roffC + nrow, coffC + 1 : coffC + ncol) )
     860             :                 call disp%show("matR(roffC + 1 : roffC + nrow, coffC + 1 : coffC + ncol) - matO(roffC + 1 : roffC + nrow, coffC + 1 : coffC + ncol)")
     861             :                 call disp%show( matR(roffC + 1 : roffC + nrow, coffC + 1 : coffC + ncol) - matO(roffC + 1 : roffC + nrow, coffC + 1 : coffC + ncol) )
     862             :                 call disp%show("tolerable")
     863             :                 call disp%show( tolerable )
     864             :                 call disp%show("pack(matO, .not. tolerable)")
     865             :                 call disp%show( pack(matO, .not. tolerable) )
     866             :                 call disp%show("pack(matR, .not. tolerable)")
     867             :                 call disp%show( pack(matR, .not. tolerable) )
     868             :                 call disp%show("pack(matR - matO, .not. tolerable)")
     869             :                 call disp%show( pack(matR - matO, .not. tolerable) )
     870             :                 call disp%show("TOL")
     871             :                 call disp%show( TOL )
     872             :                 ! LCOV_EXCL_STOP
     873             :             end if
     874       25168 :             call test%assert(assertion, SK_"SHMM test with the above specifications must successfully pass.", int(line, IK))
     875       25168 :         end subroutine
     876             : 
     877             :         !%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
     878             : 
     879        1824 :         subroutine testXPMV(alpha, beta)
     880             :             TYPE_KIND, intent(in), optional :: alpha, beta
     881             :             TYPE_KIND :: alpha_def, beta_def
     882             :             logical(LK) :: simplified
     883         832 :             alpha_def = 1; beta_def = 1
     884        1824 :             if (present(beta)) beta_def = beta
     885        1824 :             if (present(alpha)) alpha_def = alpha
     886        1824 :             simplified = .not. (present(alpha) .and. present(beta))
     887             :             ! symmetric uppDia
     888       19733 :             vecO = vecC
     889       19733 :             vecR = vecC ! this initial assignment is important.
     890        1824 :             call setMatCopy(matD, rdpack, matD, rdpack, uppDia, transSymm)
     891             :             ! \bug
     892             :             ! gfortran 13.2 yields incorrect results in the last element of the output vecR in the expression below. 
     893             :             ! This happens in release mode (with heap memory in serial shared library) but not in debug mode, indicating that aggressive optimization has something to do with this potential bug.
     894             :             ! For now, it seems like converting the `merge()` to an if block resolves the error.
     895             :             !vecR(begC:endC:incC) = merge(vecO(begC:endC:incC), matmul(alpha_def * matD, vecB(begB:endB:incB)) + beta_def * vecO(begC:endC:incC), renabled)
     896        1824 :             if (renabled) then
     897        1455 :                 vecR(begC:endC:incC) = vecO(begC:endC:incC)
     898             :             else
     899      201529 :                 vecR(begC:endC:incC) = matmul(alpha_def * matD, vecB(begB:endB:incB)) + beta_def * vecO(begC:endC:incC)
     900             :             end if
     901       75170 :             vecA = getMatCopy(lfpack, matD, rdpack, uppDia)
     902       28644 :             call setMatMulAdd(vecA, symmetric, uppDia, lfpack, vecB(begB:endB:incB), vecO(begC:endC:incC), alpha, beta)
     903        1824 :             call reportXPMV(__LINE__, simplified, classA = "symmetric", subsetA = "uppDia")
     904        1824 :             if (.not. simplified) then ! contiguous
     905       14144 :                 vecO = vecC
     906        1300 :                 call setMatMulAdd(vecA, symmetric, uppDia, lfpack, vecB, vecO, alpha, beta, ndim, incB, incC)
     907        1300 :                 call reportXPMV(__LINE__, simplified, classA = "symmetric", subsetA = "uppDia")
     908             :             end if
     909             :             ! symmetric lowDia
     910       19733 :             vecO = vecC
     911       19733 :             vecR = vecC ! this initial assignment is important.
     912        1824 :             call setMatCopy(matD, rdpack, matD, rdpack, lowDia, transSymm)
     913             :             ! \bug
     914             :             ! gfortran 13.2 yields incorrect results in the last element of the output vecR in the expression below. 
     915             :             ! This happens in release mode (with heap memory in serial shared library) but not in debug mode, indicating that aggressive optimization has something to do with this potential bug.
     916             :             ! For now, it seems like converting the `merge()` to an if block resolves the error.
     917             :             !vecR(begC:endC:incC) = merge(vecO(begC:endC:incC), matmul(alpha_def * matD, vecB(begB:endB:incB)) + beta_def * vecO(begC:endC:incC), renabled)
     918        1824 :             if (renabled) then
     919        1455 :                 vecR(begC:endC:incC) = vecO(begC:endC:incC)
     920             :             else
     921      201529 :                 vecR(begC:endC:incC) = matmul(alpha_def * matD, vecB(begB:endB:incB)) + beta_def * vecO(begC:endC:incC)
     922             :             end if
     923       75170 :             vecA = getMatCopy(lfpack, matD, rdpack, lowDia)
     924       28644 :             call setMatMulAdd(vecA, symmetric, lowDia, lfpack, vecB(begB:endB:incB), vecO(begC:endC:incC), alpha, beta)
     925        1824 :             call reportXPMV(__LINE__, simplified, classA = "symmetric", subsetA = "lowDia")
     926        1824 :             if (.not. simplified) then ! contiguous
     927       14144 :                 vecO = vecC
     928        1300 :                 call setMatMulAdd(vecA, symmetric, lowDia, lfpack, vecB, vecO, alpha, beta, ndim, incB, incC)
     929        1300 :                 call reportXPMV(__LINE__, simplified, classA = "symmetric", subsetA = "lowDia")
     930             :             end if
     931             :             ! hermitian uppDia
     932       19733 :             vecO = vecC
     933       19733 :             vecR = vecC ! this initial assignment is important.
     934        1824 :             call setMatCopy(matD, rdpack, matD, rdpack, uppDia, transHerm)
     935             :             ! \bug
     936             :             ! gfortran 13.2 yields incorrect results in the last element of the output vecR in the expression below. 
     937             :             ! This happens in release mode (with heap memory in serial shared library) but not in debug mode, indicating that aggressive optimization has something to do with this potential bug.
     938             :             ! For now, it seems like converting the `merge()` to an if block resolves the error.
     939             :             !vecR(begC:endC:incC) = merge(vecO(begC:endC:incC), matmul(alpha_def * matD, vecB(begB:endB:incB)) + beta_def * vecO(begC:endC:incC), renabled)
     940        1824 :             if (renabled) then
     941        1455 :                 vecR(begC:endC:incC) = vecO(begC:endC:incC)
     942             :             else
     943      201529 :                 vecR(begC:endC:incC) = matmul(alpha_def * matD, vecB(begB:endB:incB)) + beta_def * vecO(begC:endC:incC)
     944             :             end if
     945       75170 :             vecA = getMatCopy(lfpack, matD, rdpack, uppDia)
     946       28644 :             call setMatMulAdd(vecA, hermitian, uppDia, lfpack, vecB(begB:endB:incB), vecO(begC:endC:incC), alpha, beta)
     947        1824 :             call reportXPMV(__LINE__, simplified, classA = "hermitian", subsetA = "uppDia")
     948        1824 :             if (.not. simplified) then ! contiguous
     949       14144 :                 vecO = vecC
     950        1300 :                 call setMatMulAdd(vecA, hermitian, uppDia, lfpack, vecB, vecO, alpha, beta, ndim, incB, incC)
     951        1300 :                 call reportXPMV(__LINE__, simplified, classA = "hermitian", subsetA = "uppDia")
     952             :             end if
     953             :             ! hermitian lowDia
     954       19733 :             vecO = vecC
     955       19733 :             vecR = vecC ! this initial assignment is important.
     956        1824 :             call setMatCopy(matD, rdpack, matD, rdpack, lowDia, transHerm)
     957             :             ! \bug
     958             :             ! gfortran 13.2 yields incorrect results in the last element of the output vecR in the expression below. 
     959             :             ! This happens in release mode (with heap memory in serial shared library) but not in debug mode, indicating that aggressive optimization has something to do with this potential bug.
     960             :             ! For now, it seems like converting the `merge()` to an if block resolves the error.
     961             :             !vecR(begC:endC:incC) = merge(vecO(begC:endC:incC), matmul(alpha_def * matD, vecB(begB:endB:incB)) + beta_def * vecO(begC:endC:incC), renabled)
     962        1824 :             if (renabled) then
     963        1455 :                 vecR(begC:endC:incC) = vecO(begC:endC:incC)
     964             :             else
     965      201529 :                 vecR(begC:endC:incC) = matmul(alpha_def * matD, vecB(begB:endB:incB)) + beta_def * vecO(begC:endC:incC)
     966             :             end if
     967       75170 :             vecA = getMatCopy(lfpack, matD, rdpack, lowDia)
     968       28644 :             call setMatMulAdd(vecA, hermitian, lowDia, lfpack, vecB(begB:endB:incB), vecO(begC:endC:incC), alpha, beta)
     969        1824 :             call reportXPMV(__LINE__, simplified, classA = "hermitian", subsetA = "lowDia")
     970        1824 :             if (.not. simplified) then ! contiguous
     971       14144 :                 vecO = vecC
     972        1300 :                 call setMatMulAdd(vecA, hermitian, lowDia, lfpack, vecB, vecO, alpha, beta, ndim, incB, incC)
     973        1300 :                 call reportXPMV(__LINE__, simplified, classA = "hermitian", subsetA = "lowDia")
     974             :             end if
     975        1824 :         end subroutine
     976             : 
     977       12496 :         subroutine reportXPMV(line, simplified, classA, subsetA)
     978             :             character(*), intent(in) :: classA, subsetA
     979             :             logical(LK) , intent(in) :: simplified
     980             :             integer, intent(in) :: line
     981             :             logical(LK), allocatable :: tolerable(:)
     982       12496 :             type(display_type) :: disp
     983             : #if         IK_ENABLED
     984       50044 :             tolerable = vecR == vecO
     985             : #elif       CK_ENABLED || RK_ENABLED
     986      155552 :             tolerable = isClose(vecR, vecO, abstol = TOL)
     987             : #else
     988             : #error      "Unrecognized interface."
     989             : #endif
     990      123012 :             assertion = assertion .and. all(tolerable)
     991       12496 :             if (test%traceable .and. .not. assertion) then
     992             :                 ! LCOV_EXCL_START
     993             :                 call disp%show("XPMV")
     994             :                 call disp%show("renabled")
     995             :                 call disp%show( renabled )
     996             :                 call disp%show("simplified")
     997             :                 call disp%show( simplified )
     998             :                 call disp%show("classA")
     999             :                 call disp%show( classA )
    1000             :                 call disp%show("subsetA")
    1001             :                 call disp%show( subsetA )
    1002             :                 call disp%show("shape(vecA)")
    1003             :                 call disp%show( shape(vecA) )
    1004             :                 call disp%show("shape(vecB)")
    1005             :                 call disp%show( shape(vecB) )
    1006             :                 call disp%show("shape(vecC)")
    1007             :                 call disp%show( shape(vecC) )
    1008             :                 call disp%show("[ndim, incB, incC]")
    1009             :                 call disp%show( [ndim, incB, incC] )
    1010             :                 call disp%show("[alpha, beta]")
    1011             :                 call disp%show( [alpha, beta] )
    1012             :                 call disp%show("matD")
    1013             :                 call disp%show( matD )
    1014             :                 call disp%show("vecA")
    1015             :                 call disp%show( vecA )
    1016             :                 call disp%show("vecB(begB:endB:incB)")
    1017             :                 call disp%show( vecB(begB:endB:incB) )
    1018             :                 call disp%show("vecC(begC:endC:incC)")
    1019             :                 call disp%show( vecC(begC:endC:incC) )
    1020             :                 call disp%show("vecO(begC:endC:incC)")
    1021             :                 call disp%show( vecO(begC:endC:incC) )
    1022             :                 call disp%show("vecR(begC:endC:incC)")
    1023             :                 call disp%show( vecR(begC:endC:incC) )
    1024             :                 call disp%show("vecR(begC:endC:incC) - vecO(begC:endC:incC)")
    1025             :                 call disp%show( vecR(begC:endC:incC) - vecO(begC:endC:incC) )
    1026             :                 call disp%show("tolerable")
    1027             :                 call disp%show( tolerable )
    1028             :                 call disp%show("pack(vecO, .not. tolerable)")
    1029             :                 call disp%show( pack(vecO, .not. tolerable) )
    1030             :                 call disp%show("pack(vecR, .not. tolerable)")
    1031             :                 call disp%show( pack(vecR, .not. tolerable) )
    1032             :                 call disp%show("pack(vecR - vecO, .not. tolerable)")
    1033             :                 call disp%show( pack(vecR - vecO, .not. tolerable) )
    1034             :                 call disp%show("TOL")
    1035             :                 call disp%show( TOL )
    1036             :                 ! LCOV_EXCL_STOP
    1037             :             end if
    1038       12496 :             call test%assert(assertion, SK_"XPMV test with the above specifications must successfully pass.", int(line, IK))
    1039       12496 :         end subroutine
    1040             : 
    1041             :         !%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    1042             : 
    1043             : #undef  TYPE_KIND
    1044             : #undef  GET_CONJG

ParaMonte: Parallel Monte Carlo and Machine Learning Library 
The Computational Data Science Lab
© Copyright 2012 - 2024