https://www.cdslab.org/paramonte/fortran/2
Current view: top level - main - pm_matrixMulAdd@routines.inc.F90 (source / functions) Hit Total Coverage
Test: ParaMonte 2.0.0 :: Serial Fortran - Code Coverage Report Lines: 312 312 100.0 %
Date: 2024-04-08 03:18:57 Functions: 0 0 -
Legend: Lines: hit not hit

          Line data    Source code
       1             : !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
       2             : !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
       3             : !!!!                                                                                                                            !!!!
       4             : !!!!    ParaMonte: Parallel Monte Carlo and Machine Learning Library.                                                           !!!!
       5             : !!!!                                                                                                                            !!!!
       6             : !!!!    Copyright (C) 2012-present, The Computational Data Science Lab                                                          !!!!
       7             : !!!!                                                                                                                            !!!!
       8             : !!!!    This file is part of the ParaMonte library.                                                                             !!!!
       9             : !!!!                                                                                                                            !!!!
      10             : !!!!    LICENSE                                                                                                                 !!!!
      11             : !!!!                                                                                                                            !!!!
      12             : !!!!       https://github.com/cdslaborg/paramonte/blob/main/LICENSE.md                                                          !!!!
      13             : !!!!                                                                                                                            !!!!
      14             : !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
      15             : !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
      16             : 
      17             : !>  \brief
      18             : !>  This include file contains procedure implementation of the generic interfaces of [pm_matrixMulAdd](@ref pm_matrixMulAdd).
      19             : !>
      20             : !>  \finmain
      21             : !>
      22             : !>  \author
      23             : !>  \AmirShahmoradi, Apr 21, 2017, 1:54 AM, Institute for Computational Engineering and Sciences (ICES), The University of Texas at Austin
      24             : 
      25             : !%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
      26             : 
      27             :         ! Define the default optional constants.
      28             : #if     ASS_ENABLED && (gemv_ENABLED || spmv_ENABLED || hpmv_ENABLED || symm_ENABLED || symv_ENABLED || hemv_ENABLED || hemm_ENABLED || gemm_ENABLED)
      29             : #define BETA beta_def
      30             : #define ALPHA alpha_def
      31             : #define DECLARE_SET_DEFAULT_ALPHA_BETA \
      32             : TYPE_KIND :: alpha_def, beta_def; \
      33             : beta_def = ONE; alpha_def = ONE; \
      34             : if (present(beta)) beta_def = beta; \
      35             : if (present(alpha)) alpha_def = alpha;
      36             : #elif   !EXP_ENABLED
      37             : #error  "Unrecognized interface."
      38             : #endif
      39             :         ! Define the transposition of `matA` for BLAS gemv/gemm call routines.
      40             : #if     TNA_ENABLED
      41             : #define TMA "N"
      42             : #elif   TSA_ENABLED
      43             : #define TMA "T"
      44             : #elif   THA_ENABLED
      45             : #define TMA "C"
      46             : #endif
      47             :         ! Define the transposition of `matB` for BLAS gemm call routines.
      48             : #if     TNB_ENABLED
      49             : #define TMB "N"
      50             : #elif   TSB_ENABLED
      51             : #define TMB "T"
      52             : #elif   THB_ENABLED
      53             : #define TMB "C"
      54             : #endif
      55             :         ! Define the call interface for BLAS symm/hemm call routines.
      56             : #if     CHA_ENABLED && CHB_ENABLED
      57             : #define SHMM blasHEMM
      58             : #elif   CSA_ENABLED || CNA_ENABLED || CSB_ENABLED || CNB_ENABLED
      59             : #define SHMM blasSYMM
      60             : #endif
      61             :         ! Define blas gemm call interface.
      62             : #define GEMM(transA, tranB, alpha, beta) \
      63             : blasGEMM(transA, tranB, nrow, ncol, ndum, alpha, matA(1, 1), size(matA, 1, IK), matB(1, 1), size(matB, 1, IK), beta, matC(1, 1), size(matC, 1, IK))
      64             :         ! Define matA transpose form.
      65             : #if     CK_ENABLED && (CHA_ENABLED || THA_ENABLED)
      66             : #define CONJG_A(X) conjg(X)
      67             : #elif   THA_ENABLED || TSA_ENABLED || TNA_ENABLED || CHA_ENABLED || CSA_ENABLED || CNA_ENABLED
      68             : #define CONJG_A(X) X
      69             : #else
      70             : #error  "Unrecognized interface."
      71             : #endif
      72             :         ! Define matB transpose form.
      73             : #if     CK_ENABLED && (CHB_ENABLED || THB_ENABLED)
      74             : #define CONJG_B(X) conjg(X)
      75             : #elif   THB_ENABLED || TSB_ENABLED || TNB_ENABLED || CHB_ENABLED || CSB_ENABLED || CNB_ENABLED
      76             : #define CONJG_B(X) X
      77             : #else
      78             : #error  "Unrecognized interface."
      79             : #endif
      80             :         ! Define the `real` component function.
      81             : #if     CK_ENABLED && (THA_ENABLED || THB_ENABLED || CHA_ENABLED || CHB_ENABLED)
      82             : #define GET_RE(X) X%re
      83             : #elif   THA_ENABLED || TSA_ENABLED || TNA_ENABLED || THB_ENABLED || TSB_ENABLED || TNB_ENABLED || \
      84             :         CHA_ENABLED || CSA_ENABLED || CNA_ENABLED || CHB_ENABLED || CSB_ENABLED || CNB_ENABLED
      85             : #define GET_RE(X) X
      86             : #else
      87             : #error  "Unrecognized interface."
      88             : #endif
      89             :         ! Define the additional temporary variable for triangular `matA`.
      90             : !#if     SLA_ENABLED || SUA_ENABLED || D2_D1_ENABLED || D1_D1_ENABLED
      91             : !#define ndum nrow
      92             : !#elif   SLB_ENABLED || SUB_ENABLED
      93             : !#define ndum ncol
      94             : !#elif   SFA_ENABLED && SFB_ENABLED
      95             : !#else
      96             : !#error  "Unrecognized interface."
      97             : !#endif
      98             :         ! Declare constants and temporary variables.
      99             : #if     IK_ENABLED
     100             : #define TYPE_KIND integer(IKC)
     101             :         integer(IKC), parameter :: ZERO = 0_IKC, ONE = 1_IKC
     102             : #elif   CK_ENABLED
     103             : #define TYPE_KIND complex(CKC)
     104             :         complex(CKC), parameter :: ZERO = (0._CKC, 0._CKC), ONE = (1._CKC, 0._CKC)
     105             : #elif   RK_ENABLED
     106             : #define TYPE_KIND real(RKC)
     107             :         real(RKC)   , parameter :: ZERO = 0._RKC, ONE = 1._RKC
     108             : #else
     109             : #error  "Unrecognized interface."
     110             : #endif
     111             : 
     112             : !        !%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
     113             : !#if     setMatMulAdd_ENABLED && axpby_ENABLED
     114             : !        !%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
     115             : !
     116             : !        integer(IK) :: nell, iell, bell, cell
     117             : !        if (matA == ZERO) return
     118             : !        nell =
     119             : !#if     ASS_ENABLED
     120             : !        ! Code for both increments equal to 1
     121             : !        do iell = 1, nell
     122             : !            matC(iell) = matA * matB(iell) + matC(iell)
     123             : !        end do
     124             : !#elif   EXP_ENABLED
     125             : !        if (incB == 1 .and. incC == 1) then
     126             : !        else ! code for unequal increments or equal increments not equal to 1.
     127             : !            bell = 1
     128             : !            cell = 1
     129             : !            if (incB < 0) bell = (1 - nell) * incB + 1
     130             : !            if (incC < 0) cell = (1 - nell) * incC + 1
     131             : !            do iell = 1, nell
     132             : !                matC(cell) = matC(cell) + matA * matB(bell)
     133             : !                bell = bell + incB
     134             : !                cell = cell + incC
     135             : !            end do
     136             : !        end if
     137             : !#endif
     138             : 
     139             :         !%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
     140             : #if     setMatMulAdd_ENABLED && gemv_ENABLED
     141             :         !%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
     142             : 
     143             :         ! Define the implied shapes and perform bound checks.
     144             : #if     !(BLAS_ENABLED && DISPATCH_ENABLED)
     145             :         integer(IK) :: irow, icol, iell, bstart, cstart
     146             : #endif
     147             : #if     ASS_ENABLED
     148             :         integer(IK) :: nrow, ncol
     149             :         integer(IK) , parameter :: incB = 1_IK, incC = 1_IK
     150        5454 :         DECLARE_SET_DEFAULT_ALPHA_BETA
     151        5454 :         nrow = size(matA, 1, IK)
     152        5454 :         ncol = size(matA, 2, IK)
     153             : #elif   EXP_ENABLED
     154             :         ! Ensure offsets and subset bounds are logical.
     155       11736 :         CHECK_ASSERTION(__LINE__, all(0_IK <= [nrow, ncol]), \
     156             :         SK_"@setMatMulAdd(): The condition `all(0_IK <= [nrow, ncol])` must hold. nrow, ncol = "//getStr([nrow, ncol])) ! fpp
     157       19560 :         CHECK_ASSERTION(__LINE__, all(0_IK <= 1_IK - lbound(matA, kind = IK)), \
     158             :         SK_"@setMatMulAdd(): The condition `all(0 <= [roffA, coffA])` must hold. roffA, coffA = "//getStr(1_IK - lbound(matA))) ! fpp
     159             :         ! Ensure subset of `matA` does not overflow the matrix bounds.
     160       43032 :         CHECK_ASSERTION(__LINE__, all([roffA + nrow, coffA + ncol] <= shape(matA, kind = IK)), \
     161             :         SK_"@setMatMulAdd(): The condition `all([roffA + nrow, coffA + ncol] <= shape(matA))` must hold. roffA, coffA, nrow, ncol, shape(matA) = "\
     162             :         //getStr([roffA, coffA, nrow, ncol, shape(matA, IK)])) ! fpp
     163             : #else
     164             : #error  "Unrecognized interface."
     165             : #endif
     166             :         ! Check bounds.
     167             : #if     TNA_ENABLED
     168             : #define LENB ncol
     169             : #define LENC nrow
     170       21896 :         CHECK_ASSERTION(__LINE__, all([ncol, nrow] == [size(matB(::abs(incB)), 1, IK), size(matC(::abs(incC)), 1, IK)]), \
     171             :         SK_"@setMatMulAdd(): The condition `all([nrow, ncol] == [size(matC(::abs(incC))), size(matB(::abs(incB)))])` must hold. [nrow, ncol], size(matC(::abs(incC))), size(matB(::abs(incB))) = "\
     172             :         //getStr([[nrow, ncol], size(matC(::abs(incC)), 1, IK), size(matB(::abs(incB)), 1, IK)])) ! fpp
     173             : #elif   TSA_ENABLED || THA_ENABLED
     174             : #define LENB nrow
     175             : #define LENC ncol
     176       68618 :         CHECK_ASSERTION(__LINE__, all([nrow, ncol] == [size(matB(::abs(incB)), 1, IK), size(matC(::abs(incC)), 1, IK)]), \
     177             :         SK_"@setMatMulAdd(): The condition `all([nrow, ncol] == [size(matB(::abs(incB))), size(matC(::abs(incC)))])` must hold. [nrow, ncol], size(matB(::abs(incB))), size(matC(::abs(incC))) = "\
     178             :         //getStr([shape(matA, IK), [nrow, ncol], size(matB(::abs(incB)), 1, IK), size(matC(::abs(incC)), 1, IK)])) ! fpp
     179             : #endif
     180             :         ! Quick return if possible.
     181        9366 :         if (nrow == 0_IK .or. ncol == 0_IK .or. (ALPHA == ZERO .and. BETA == ONE)) return
     182             : #if     BLAS_ENABLED && DISPATCH_ENABLED
     183             :         call blasGEMV(TMA, nrow, ncol, ALPHA, matA(1,1), size(matA, 1, IK), matB(1), incB, BETA, matC(1), incC)
     184             : #else
     185             :         ! Set `lenB` and `lenC`, the lengths of the vectors `matB` and `matC`, and set up the start points in `matB` and `matC`.
     186        2814 :         if (0_IK < incB) then
     187             :             bstart = 1
     188             :         else
     189        1380 :             bstart = 1 - (LENB - 1) * incB
     190             :         end if
     191        2814 :         if (0_IK < incC) then
     192             :             cstart = 1
     193             :         else
     194        1374 :             cstart = 1 - (LENC - 1) * incC
     195             :         end if
     196             :         ! Start the operations. in this version the elements of matA are accessed sequentially with one pass through matA.
     197             :         ! First form  matC := beta * matC.
     198        6651 :         if (BETA /= ONE) then
     199        2134 :             if (incC == 1_IK) then
     200        2906 :                 if (BETA == ZERO) then
     201        9179 :                     matC(1 : LENC) = ZERO
     202             :                 else
     203             :                     do concurrent(irow = 1 : LENC)
     204        9762 :                         matC(irow) = BETA * matC(irow)
     205             :                     end do
     206             :                 end if
     207             :             else
     208             :                 iell = cstart
     209        1731 :                 if (BETA == ZERO) then
     210        5394 :                     do irow = 1, LENC
     211        4542 :                         matC(iell) = ZERO
     212        5394 :                         iell = iell + incC
     213             :                     end do
     214             :                 else
     215        5688 :                     do irow = 1, LENC
     216        4809 :                         matC(iell) = BETA * matC(iell)
     217        5688 :                         iell = iell + incC
     218             :                     end do
     219             :                 end if
     220             :             end if
     221             :         end if
     222        6651 :         if (ALPHA == ZERO) return
     223             : #if     TNA_ENABLED
     224             :         ! Form matC := alpha * matA * matB + matC.
     225             :         block
     226             :             integer(IK) :: cell
     227             :             TYPE_KIND :: temp
     228             :             iell = bstart
     229         711 :             if (incC == 1_IK) then
     230        7535 :                 do icol = 1, ncol
     231        6373 :                     temp = ALPHA * matB(iell)
     232       42511 :                     do irow = 1, nrow
     233       42511 :                         matC(irow) = matC(irow) + temp * matA(irow, icol)
     234             :                     end do
     235        7535 :                     iell = iell + incB
     236             :                 end do
     237             :             else
     238        3934 :                 do icol = 1, ncol
     239        3333 :                     temp = ALPHA * matB(iell)
     240             :                     cell = cstart
     241       21903 :                     do irow = 1, nrow
     242       18570 :                         matC(cell) = matC(cell) + temp * matA(irow, icol)
     243       21903 :                         cell = cell + incC
     244             :                     end do
     245        3934 :                     iell = iell + incB
     246             :                 end do
     247             :             end if
     248             :         end block
     249             : #elif   TSA_ENABLED || THA_ENABLED
     250             :         ! Form matC := alpha * matA**T * matB + matC or matC := alpha * matA**H * matB + matC.
     251             :         block
     252             :             TYPE_KIND :: temp
     253             :             iell = cstart
     254        1413 :             if (incB == 1_IK) then
     255       15476 :                 do icol = 1, ncol
     256        5960 :                     temp = ZERO
     257       85116 :                     do irow = 1, nrow
     258       85116 :                         temp = temp + CONJG_A(matA(irow, icol)) * matB(irow)
     259             :                     end do
     260       13154 :                     matC(iell) = matC(iell) + ALPHA * temp
     261       15476 :                     iell = iell + incC
     262             :                 end do
     263             :             else
     264        7866 :                 do icol = 1, ncol
     265        3194 :                     temp = ZERO
     266             :                     cstart = bstart
     267       43918 :                     do irow = 1, nrow
     268       37238 :                         temp = temp + CONJG_A(matA(irow, icol)) * matB(cstart)
     269       43918 :                         cstart = cstart + incB
     270             :                     end do
     271        6680 :                     matC(iell) = matC(iell) + ALPHA * temp
     272        7866 :                     iell = iell + incC
     273             :                 end do
     274             :             end if
     275             :         end block
     276             : #else
     277             : #error  "Unrecognized interface."
     278             : #endif
     279             : #endif
     280             : 
     281             :         !%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
     282             : #elif   setMatMulAdd_ENABLED && (spmv_ENABLED || hpmv_ENABLED)
     283             :         !%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
     284             : 
     285             : #if     spmv_ENABLED
     286             : #define PMV blasSPMV
     287             : #elif   hpmv_ENABLED
     288             : #define PMV blasHPMV
     289             : #else
     290             : #error  "Unrecognized interface."
     291             : #endif
     292             : #if     !(BLAS_ENABLED && DISPATCH_ENABLED)
     293             :         integer(IK) :: ib, ic, jb, jc, bstart, cstart, irow, icol, idum, jdum
     294             : #endif
     295             :         ! Define the implied shapes and perform bound checks.
     296             : #if     ASS_ENABLED
     297             :         integer(IK) , parameter :: incB = 1_IK, incC = 1_IK
     298             :         integer(IK) :: ndim
     299        7296 :         DECLARE_SET_DEFAULT_ALPHA_BETA
     300        7296 :         ndim = size(matB, 1, IK)
     301       21888 :         CHECK_ASSERTION(__LINE__, ndim == size(matC, kind = IK), \
     302             :         SK_"@setMatMulAdd(): The condition `size(matB) == size(matC)` must hold. size(matB), size(matC) = "\
     303             :         //getStr([ndim, size(matC, kind = IK)])) ! fpp
     304             : #elif   EXP_ENABLED
     305        5200 :         CHECK_ASSERTION(__LINE__, 0_IK <= ndim, \
     306             :         SK_"@setMatMulAdd(): The condition `0 <= ndim` must hold. ndim = "//getStr(ndim)) ! fpp
     307       15600 :         CHECK_ASSERTION(__LINE__, all(0_IK /= [incB, incC]), \
     308             :         SK_"@setMatMulAdd(): The condition `all(0 /= [incB, incC])` must hold. ndim, roffA, coffA, incB, incC = "//\
     309             :         getStr([incB, incC])) ! fpp
     310       15600 :         CHECK_ASSERTION(__LINE__, abs(incB) * (ndim - 1) < size(matB, kind = IK), \
     311             :         SK_"@setMatMulAdd(): The condition `abs(incB) * (ndim - 1) < size(matB(:))` must hold. incB, size(matB) = "//\
     312             :         getStr([incB, size(matB, kind = IK)])) ! fpp
     313       15600 :         CHECK_ASSERTION(__LINE__, abs(incC) * (ndim - 1) < size(matC, kind = IK), \
     314             :         SK_"@setMatMulAdd(): The condition `abs(incC) * (ndim - 1) < size(matC(:))` must hold. incC, size(matC) = "//\
     315             :         getStr([incC, size(matC, kind = IK)])) ! fpp
     316             : #else
     317             : #error  "Unrecognized interface."
     318             : #endif
     319       37488 :         CHECK_ASSERTION(__LINE__, ndim * (ndim + 1_IK) / 2_IK == size(matA, kind = IK), \
     320             :         SK_"@setMatMulAdd(): The condition `ndim * (ndim + 1) / 2 == size(matA)` must hold. ndim, size(matA) = "\
     321             :         //getStr([ndim, size(matA, kind = IK)])) ! fpp
     322             :         ! Quick return if possible.
     323       12496 :         if (ndim == 0_IK .or. (ALPHA == ZERO .and. BETA == ONE)) return
     324             : #if     BLAS_ENABLED && DISPATCH_ENABLED && ((spmv_ENABLED && RK_ENABLED) || (hpmv_ENABLED && CK_ENABLED)) && SUA_ENABLED
     325             :         call PMV("U", ndim, ALPHA, matA(1), matB(1), incB, BETA, matC(1), incC)
     326             : #elif   BLAS_ENABLED && DISPATCH_ENABLED && ((spmv_ENABLED && RK_ENABLED) || (hpmv_ENABLED && CK_ENABLED)) && SLA_ENABLED
     327             :         call PMV("L", ndim, ALPHA, matA(1), matB(1), incB, BETA, matC(1), incC)
     328             : #else
     329             :         ! Set up the start points in matC.
     330        4124 :         if (incC > 0_IK) then
     331             :             cstart = 1_IK
     332             :         else
     333        2052 :             cstart = 1_IK - (ndim - 1_IK) * incC
     334             :         end if
     335             :         ! Start the operations. In this version the elements of the array matA
     336             :         ! are accessed sequentially with ONE pass through matA.
     337             :         ! First Form  matC := BETA * matC.
     338        9828 :         if (BETA /= ONE) then
     339        3140 :             if (incC == 1_IK) then
     340        4224 :                 if (BETA == ZERO) then
     341       14112 :                     do irow = 1_IK, ndim
     342       14112 :                         matC(irow) = ZERO
     343             :                     end do
     344             :                 else
     345       13904 :                     do irow = 1_IK, ndim
     346       13904 :                         matC(irow) = BETA * matC(irow)
     347             :                     end do
     348             :                 end if
     349             :             else
     350             :                 ic = cstart
     351        2640 :                 if (BETA == ZERO) then
     352        8652 :                     do irow = 1_IK, ndim
     353        7352 :                         matC(ic) = ZERO
     354        8652 :                         ic = ic + incC
     355             :                     end do
     356             :                 else
     357        8712 :                     do irow = 1_IK, ndim
     358        7372 :                         matC(ic) = BETA * matC(ic)
     359        8712 :                         ic = ic + incC
     360             :                     end do
     361             :                 end if
     362             :             end if
     363             :         end if
     364        9828 :         if (ALPHA == ZERO) return
     365             :         ! Set up the start points in matB.
     366        3096 :         if (incB > 0_IK) then
     367             :             bstart = 1_IK
     368             :         else
     369        1516 :             bstart = 1_IK - (ndim - 1_IK) * incB
     370             :         end if
     371             :         jdum = 1_IK
     372             : #if     SUA_ENABLED
     373             :         ! Form matC when matA contains the upper triangle.
     374             :         block
     375             :             TYPE_KIND :: temp, dumm
     376        1548 :             if (incB == 1_IK .and. incC == 1_IK) then
     377       16768 :                 do icol = 1_IK, ndim
     378        7828 :                     temp = ALPHA * matB(icol)
     379        6352 :                     dumm = ZERO
     380             :                     idum = jdum
     381       56822 :                     do irow = 1_IK,icol - 1_IK
     382       42642 :                         matC(irow) = matC(irow) + temp * matA(idum)
     383       42642 :                         dumm = dumm + CONJG_A(matA(idum)) * matB(irow)
     384       56822 :                         idum = idum + 1_IK
     385             :                     end do
     386       14180 :                     matC(icol) = matC(icol) + temp * GET_RE(matA(jdum + icol - 1)) + ALPHA * dumm
     387        2588 :                     jdum = jdum + icol
     388             :                 end do
     389             :             else
     390             :                 jb = bstart
     391             :                 jc = cstart
     392        8450 :                 do icol = 1_IK, ndim
     393        7152 :                     temp = ALPHA * matB(jb)
     394        3308 :                     dumm = ZERO
     395             :                     ib = bstart
     396             :                     ic = cstart
     397       28796 :                     do idum = jdum, jdum + icol - 2
     398       21644 :                         matC(ic) = matC(ic) + temp * matA(idum)
     399       21644 :                         dumm = dumm + CONJG_A(matA(idum)) * matB(ib)
     400       21644 :                         ib = ib + incB
     401       28796 :                         ic = ic + incC
     402             :                     end do
     403        7152 :                     matC(jc) = matC(jc) + temp * GET_RE(matA(jdum + icol - 1)) + ALPHA * dumm
     404        7152 :                     jb = jb + incB
     405        7152 :                     jc = jc + incC
     406        1298 :                     jdum = jdum + icol
     407             :                 end do
     408             :             end if
     409             :         end block
     410             : #elif   SLA_ENABLED
     411             :         ! Form matC when matA contains the lower triangle.
     412             :         block
     413             :             TYPE_KIND :: temp, dumm
     414        1548 :             if (incB == 1_IK .and. incC == 1_IK) then
     415       16768 :                 do icol = 1_IK, ndim
     416       14180 :                     temp = ALPHA * matB(icol)
     417        6352 :                     dumm = ZERO
     418       14180 :                     matC(icol) = matC(icol) + temp * GET_RE(matA(jdum))
     419       14180 :                     idum = jdum + 1_IK
     420       56822 :                     do irow = icol + 1_IK, ndim
     421       42642 :                         matC(irow) = matC(irow) + temp * matA(idum)
     422       42642 :                         dumm = dumm + CONJG_A(matA(idum)) * matB(irow)
     423       56822 :                         idum = idum + 1_IK
     424             :                     end do
     425       14180 :                     matC(icol) = matC(icol) + ALPHA * dumm
     426       16768 :                     jdum = jdum + (ndim - icol + 1)
     427             :                 end do
     428             :             else
     429             :                 jb = bstart
     430             :                 jc = cstart
     431        8450 :                 do icol = 1_IK, ndim
     432        7152 :                     temp = ALPHA * matB(jb)
     433        3308 :                     dumm = ZERO
     434        7152 :                     matC(jc) = matC(jc) + temp * GET_RE(matA(jdum))
     435             :                     ib = jb
     436             :                     ic = jc
     437       28796 :                     do idum = jdum + 1_IK, jdum + ndim - icol
     438       21644 :                         ib = ib + incB
     439       21644 :                         ic = ic + incC
     440       21644 :                         matC(ic) = matC(ic) + temp * matA(idum)
     441       28796 :                         dumm = dumm + CONJG_A(matA(idum)) * matB(ib)
     442             :                     end do
     443        7152 :                     matC(jc) = matC(jc) + ALPHA * dumm
     444        7152 :                     jb = jb + incB
     445        7152 :                     jc = jc + incC
     446        8450 :                     jdum = jdum + (ndim - icol + 1)
     447             :                 end do
     448             :             end if
     449             :         end block
     450             : #else
     451             : #error  "Unrecognized interface."
     452             : #endif
     453             : #endif
     454             : 
     455             :         !%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
     456             : #elif   setMatMulAdd_ENABLED && (symv_ENABLED || hemv_ENABLED)
     457             :         !%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
     458             : 
     459             : #if     symv_ENABLED
     460             : #define PMV blasSYMV
     461             : #elif   hemv_ENABLED
     462             : #define PMV blasHEMV
     463             : #else
     464             : #error  "Unrecognized interface."
     465             : #endif
     466             : #if     !(BLAS_ENABLED && DISPATCH_ENABLED && ((spmv_ENABLED && RK_ENABLED) || (hpmv_ENABLED && CK_ENABLED)) && (SLA_ENABLED || SUA_ENABLED))
     467             :         integer(IK) :: ib, ic, jb, jc, bstart, cstart, irow, icol
     468             : #endif
     469             :         ! Define the implied shapes and perform bound checks.
     470             : #if     ASS_ENABLED
     471             :         integer(IK) , parameter :: incB = 1_IK, incC = 1_IK
     472             :         integer(IK) :: ndim
     473        7320 :         DECLARE_SET_DEFAULT_ALPHA_BETA
     474        7320 :         ndim = size(matA, 1, IK)
     475       87840 :         CHECK_ASSERTION(__LINE__, all(ndim == [size(matA, 2, IK), shape(matB, IK), shape(matC, IK)]), \
     476             :         SK_"@setMatMulAdd(): The condition `all(ndim == [shape(matA(:,:)), shape(matB(:)), shape(matC(:))])` must hold. ndim, shape(matA), shape(matB), shape(matC) = "\
     477             :         //getStr([ndim, shape(matA, IK), shape(matB, IK), shape(matC, IK)])) ! fpp
     478             : #elif   EXP_ENABLED
     479       20816 :         CHECK_ASSERTION(__LINE__, all(0_IK <= [ndim, roffA, coffA]), \
     480             :         SK_"@setMatMulAdd(): The condition `all(0 <= [ndim, roffA, coffA])` must hold. ndim, roffA, coffA = "//\
     481             :         getStr([ndim, roffA, coffA])) ! fpp
     482       15612 :         CHECK_ASSERTION(__LINE__, all(0_IK /= [incB, incC]), \
     483             :         SK_"@setMatMulAdd(): The condition `all(0 /= [incB, incC])` must hold. ndim, roffA, coffA, incB, incC = "//\
     484             :         getStr([incB, incC])) ! fpp
     485       41632 :         CHECK_ASSERTION(__LINE__, all(ndim <= ubound(matA, kind = IK)), \
     486             :         SK_"@setMatMulAdd(): The condition `all(ndim + [roffA, coffA] <= shape(matA))` must hold. ndim, ubound(matA) = "//\
     487             :         getStr([ndim, ubound(matA, kind = IK)])) ! fpp
     488       15612 :         CHECK_ASSERTION(__LINE__, abs(incB) * (ndim - 1) < size(matB, kind = IK), \
     489             :         SK_"@setMatMulAdd(): The condition `abs(incB) * (ndim - 1) < size(matB(:))` must hold. incB, size(matB) = "//\
     490             :         getStr([incB, size(matB, kind = IK)])) ! fpp
     491       15612 :         CHECK_ASSERTION(__LINE__, abs(incC) * (ndim - 1) < size(matC, kind = IK), \
     492             :         SK_"@setMatMulAdd(): The condition `abs(incC) * (ndim - 1) < size(matC(:))` must hold. incC, size(matC) = "//\
     493             :         getStr([incC, size(matC, kind = IK)])) ! fpp
     494             : #else
     495             : #error  "Unrecognized interface."
     496             : #endif
     497       12524 :         if (ndim == 0_IK .or. (ALPHA == ZERO .and. BETA == ONE)) return ! Quick return if possible.
     498             : #if     BLAS_ENABLED && DISPATCH_ENABLED && ((spmv_ENABLED && RK_ENABLED) || (hpmv_ENABLED && CK_ENABLED)) && SUA_ENABLED
     499             :         call PMV("U", ndim, ALPHA, matA(1,1), size(matA, 1, IK), matB(1), incB, BETA, matC(1), incC)
     500             : #elif   BLAS_ENABLED && DISPATCH_ENABLED && ((spmv_ENABLED && RK_ENABLED) || (hpmv_ENABLED && CK_ENABLED)) && SLA_ENABLED
     501             :         call PMV("L", ndim, ALPHA, matA(1,1), size(matA, 1, IK), matB(1), incB, BETA, matC(1), incC)
     502             : #else
     503        4128 :         if(incC > 0_IK) then
     504             :             cstart = 1_IK
     505             :         else
     506        2052 :             cstart = 1_IK - (ndim - 1_IK) * incC
     507             :         end if
     508             :         ! Start the operations. In this version the elements of matA are
     509             :         ! accessed sequentially with ONE pass through the triangular part of matA.
     510             :         ! First form  matC := BETA * matC.
     511        9856 :         if(BETA /= ONE) then
     512        3142 :             if(incC == 1_IK) then
     513        4223 :                 if (BETA == ZERO) then
     514       14260 :                     do irow = 1_IK, ndim
     515       14260 :                         matC(irow) = ZERO
     516             :                     end do
     517             :                 else
     518       13520 :                     do irow = 1_IK, ndim
     519       13520 :                         matC(irow) = BETA * matC(irow)
     520             :                     end do
     521             :                 end if
     522             :             else
     523             :                 ic = cstart
     524        2641 :                 if(BETA == ZERO) then
     525        8656 :                     do irow = 1_IK, ndim
     526        7355 :                         matC(ic) = ZERO
     527        8656 :                         ic = ic + incC
     528             :                     end do
     529             :                 else
     530        8712 :                     do irow = 1_IK, ndim
     531        7372 :                         matC(ic) = BETA * matC(ic)
     532        8712 :                         ic = ic + incC
     533             :                     end do
     534             :                 end if
     535             :             end if
     536             :         end if
     537        9856 :         if(ALPHA == ZERO) return
     538             :         ! set up the start points in matB and matC.
     539        3100 :         if(incB > 0_IK) then
     540             :             bstart = 1_IK
     541             :         else
     542        1518 :             bstart = 1_IK - (ndim - 1_IK) * incB
     543             :         end if
     544             : #if     SUA_ENABLED
     545             :         ! Form  matC when matA is stored in upper triangle.
     546             :         block
     547             :             TYPE_KIND :: temp, dumm
     548        1550 :             if(incB == 1_IK .and. incC == 1_IK) then
     549       16682 :                 do icol = 1_IK, ndim
     550        7562 :                     temp = ALPHA * matB(icol)
     551        6520 :                     dumm = ZERO
     552       56360 :                     do irow = 1_IK, icol - 1_IK
     553       42278 :                         matC(irow) = matC(irow) + temp * matA(irow, icol)
     554       56360 :                         dumm = dumm + CONJG_A(matA(irow, icol)) * matB(irow)
     555             :                     end do
     556       16682 :                     matC(icol) = matC(icol) + temp * GET_RE(matA(icol, icol)) + ALPHA * dumm
     557             :                 end do
     558             :             else
     559             :                 jb = bstart
     560             :                 jc = cstart
     561        8458 :                 do icol = 1_IK, ndim
     562        3850 :                     temp = ALPHA * matB(jb)
     563        3308 :                     dumm = ZERO
     564             :                     ib = bstart
     565             :                     ic = cstart
     566       28808 :                     do irow = 1_IK, icol - 1_IK
     567       21650 :                         matC(ic) = matC(ic) + temp * matA(irow, icol)
     568       21650 :                         dumm = dumm + CONJG_A(matA(irow, icol)) * matB(ib)
     569       21650 :                         ib = ib + incB
     570       28808 :                         ic = ic + incC
     571             :                     end do
     572        7158 :                     matC(jc) = matC(jc) + temp * GET_RE(matA(icol, icol)) + ALPHA * dumm
     573        7158 :                     jb = jb + incB
     574        8458 :                     jc = jc + incC
     575             :                 end do
     576             :             end if
     577             :         end block
     578             : #elif   SLA_ENABLED
     579             :         ! Form matC when matA is stored in lower triangle.
     580             :         block
     581             :             TYPE_KIND :: temp, dumm
     582        1550 :             if (incB == 1_IK .and. incC == 1_IK) then
     583       16682 :                 do icol = 1_IK, ndim
     584       14082 :                     temp = ALPHA * matB(icol)
     585        6520 :                     dumm = ZERO
     586       14082 :                     matC(icol) = matC(icol) + temp * GET_RE(matA(icol, icol))
     587       56360 :                     do irow = icol + 1_IK, ndim
     588       42278 :                         matC(irow) = matC(irow) + temp * matA(irow, icol)
     589       56360 :                         dumm = dumm + CONJG_A(matA(irow, icol)) * matB(irow)
     590             :                     end do
     591       16682 :                     matC(icol) = matC(icol) + ALPHA * dumm
     592             :                 end do
     593             :             else
     594             :                 jb = bstart
     595             :                 jc = cstart
     596        8458 :                 do icol = 1_IK, ndim
     597        7158 :                     temp = ALPHA * matB(jb)
     598        3308 :                     dumm = ZERO
     599        7158 :                     matC(jc) = matC(jc) + temp * GET_RE(matA(icol, icol))
     600             :                     ib = jb
     601             :                     ic = jc
     602       28808 :                     do irow = icol + 1_IK, ndim
     603       21650 :                         ib = ib + incB
     604       21650 :                         ic = ic + incC
     605       21650 :                         matC(ic) = matC(ic) + temp * matA(irow, icol)
     606       28808 :                         dumm = dumm + CONJG_A(matA(irow, icol)) * matB(ib)
     607             :                     end do
     608        7158 :                     matC(jc) = matC(jc) + ALPHA * dumm
     609        7158 :                     jb = jb + incB
     610        8458 :                     jc = jc + incC
     611             :                 end do
     612             :             end if
     613             :         end block
     614             : #else
     615             : #error  "Unrecognized interface."
     616             : #endif
     617             : #endif
     618             : 
     619             :         !%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
     620             : #elif   setMatMulAdd_ENABLED && (symm_ENABLED || hemm_ENABLED)
     621             :         !%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
     622             : 
     623             : #if     !(BLAS_ENABLED && DISPATCH_ENABLED)
     624             :         integer(IK) :: idum, irow, icol
     625             : #endif
     626             :         ! Define the implied shapes and perform bound checks.
     627             : #if     ASS_ENABLED
     628             :         integer(IK) :: nrow, ncol
     629       14772 :         DECLARE_SET_DEFAULT_ALPHA_BETA
     630       14772 :         nrow = size(matC, 1, IK)
     631       14772 :         ncol = size(matC, 2, IK)
     632             :         ! Check bounds.
     633             : #if     CNA_ENABLED
     634       66474 :         CHECK_ASSERTION(__LINE__, all(shape(matA, IK) == [nrow, ncol]), \
     635             :         SK_"@setMatMulAdd(): The condition `all(shape(matA) == shape(matC))` must hold. shape(matA), nrow, ncol = "\
     636             :         //getStr([shape(matA, IK), nrow, ncol])) ! fpp
     637             : #else
     638       59088 :         CHECK_ASSERTION(__LINE__, all(shape(matA, IK) == [nrow, nrow]), \
     639             :         SK_"@setMatMulAdd(): The condition `all(shape(matA) == size(matC, 1))` must hold. shape(matA), size(matC, 1) = "\
     640             :         //getStr([shape(matA, IK), nrow])) ! fpp
     641             : #endif
     642             : #if     CNB_ENABLED
     643       66474 :         CHECK_ASSERTION(__LINE__, all(shape(matB, IK) == [nrow, ncol]), \
     644             :         SK_"@setMatMulAdd(): The condition `all(shape(matB) == shape(matC))` must hold. shape(matB), shape(matC) = "\
     645             :         //getStr([shape(matB, IK), nrow, ncol])) ! fpp
     646             : #else
     647       59088 :         CHECK_ASSERTION(__LINE__, all(shape(matB, IK) == [ncol, ncol]), \
     648             :         SK_"@setMatMulAdd(): The condition `all(shape(matB) == size(matC, 2))` must hold. shape(matB), size(matC, 2) = "\
     649             :         //getStr([shape(matB, IK), ncol])) ! fpp
     650             : #endif
     651             : #elif   EXP_ENABLED
     652             :         ! Ensure offsets and subset bounds are logical.
     653       31218 :         CHECK_ASSERTION(__LINE__, all(0_IK <= [nrow, ncol]), \
     654             :         SK_"@setMatMulAdd(): The condition `all(0_IK <= shape(matC))` must hold. shape(matC) = "//getStr([nrow, ncol])) ! fpp
     655       52030 :         CHECK_ASSERTION(__LINE__, all(0_IK <= 1_IK - lbound(matA, kind = IK)), \
     656             :         SK_"@setMatMulAdd(): The condition `all(0 <= [roffA, coffA])` must hold. roffA, coffA = "//getStr(1_IK - lbound(matA))) ! fpp
     657       52030 :         CHECK_ASSERTION(__LINE__, all(0_IK <= 1_IK - lbound(matB, kind = IK)), \
     658             :         SK_"@setMatMulAdd(): The condition `all(0 <= [roffB, coffB])` must hold. roffB, coffB = "//getStr(1_IK - lbound(matB))) ! fpp
     659       52030 :         CHECK_ASSERTION(__LINE__, all(0_IK <= 1_IK - lbound(matC, kind = IK)), \
     660             :         SK_"@setMatMulAdd(): The condition `all(0 <= [roffC, coffC])` must hold. roffC, coffC = "//getStr(1_IK - lbound(matC))) ! fpp
     661             :         ! Ensure subset of `matA` does not overflow the matrix bounds.
     662             : #if     CNA_ENABLED
     663       57244 :         CHECK_ASSERTION(__LINE__, all([nrow, ncol] <= ubound(matA, kind = IK)), \
     664             :         SK_"@setMatMulAdd(): The condition `all([nrow + roffA, ncol + coffA] <= shape(matA))` must hold. nrow, ncol, roffA, coffA, shape(matA) = "\
     665             :         //getStr([nrow, ncol, roffA, coffA, shape(matA, IK)])) ! fpp
     666             : #else
     667       52020 :         CHECK_ASSERTION(__LINE__, all([nrow, nrow] <= ubound(matA, kind = IK)), \
     668             :         SK_"@setMatMulAdd(): The condition `all([nrow + roffA, nrow + coffA] <= shape(matA))` must hold. nrow, roffA, coffA, shape(matA) = "\
     669             :         //getStr([nrow, roffA, coffA, shape(matA, IK)])) ! fpp
     670             : #endif
     671             :         ! Ensure subset of `matB` does not overflow the matrix bounds.
     672             : #if     CNB_ENABLED
     673       57222 :         CHECK_ASSERTION(__LINE__, all([nrow, ncol] <= ubound(matB, kind = IK)), \
     674             :         SK_"@setMatMulAdd(): The condition `all([nrow + roffB, ncol + coffB] <= shape(matB))` must hold. nrow, ncol, roffB, coffB, shape(matB) = "\
     675             :         //getStr([nrow, ncol, roffB, coffB, shape(matB, IK)])) ! fpp
     676             : #else
     677       52040 :         CHECK_ASSERTION(__LINE__, all([ncol, ncol] <= ubound(matB, kind = IK)), \
     678             :         SK_"@setMatMulAdd(): The condition `all([ncol + roffB, ncol + coffB] <= shape(matB))` must hold. ncol, roffB, coffB, shape(matB) = "\
     679             :         //getStr([ncol, roffB, coffB, shape(matB, IK)])) ! fpp
     680             : #endif
     681             : #else
     682             : #error  "Unrecognized interface."
     683             : #endif
     684             :         ! Quick return if possible.
     685       25178 :         if (nrow == 0_IK .or. ncol == 0_IK .or. (ALPHA == ZERO .and. BETA == ONE)) return
     686       18242 :         if (ALPHA == ZERO) then
     687        3664 :             if (BETA == ZERO) then
     688             :                 do concurrent(icol = 1 : ncol, irow = 1 : nrow)
     689       59824 :                     matC(irow, icol) = ZERO
     690             :                 end do
     691             :             else
     692             :                 do concurrent(icol = 1 : ncol, irow = 1 : nrow)
     693       72016 :                     matC(irow, icol) = BETA * matC(irow, icol)
     694             :                 end do
     695             :             end if
     696        3664 :             return
     697             :         end if
     698             :         ! BLAS 3 SYMM/HEMM: Form matC := alpha * matA(upper-triangular) * matB + beta * matC.
     699             : #if     SUA_ENABLED && SFB_ENABLED && (CSA_ENABLED || CHA_ENABLED) && CNB_ENABLED && BLAS_ENABLED && DISPATCH_ENABLED
     700             :         call SHMM("L", "U", nrow, ncol, ALPHA, matA, size(matA, 1, IK), matB, size(matB, 1, IK), BETA, matC, size(matC, 1, IK))
     701             : #elif   SUA_ENABLED && SFB_ENABLED && (CSA_ENABLED || CHA_ENABLED) && CNB_ENABLED
     702             :         block
     703             :             TYPE_KIND :: temp, dumm
     704       24088 :             do icol = 1, ncol
     705      138768 :                 do irow = 1, nrow
     706       61640 :                         temp = ALPHA * matB(irow, icol)
     707       53040 :                         dumm = ZERO
     708      469416 :                         do idum = 1, irow - 1
     709      354736 :                             matC(idum, icol) = matC(idum, icol) + temp * matA(idum, irow)
     710      469416 :                             dumm = dumm + matB(idum, icol) * CONJG_A(matA(idum, irow))
     711             :                         end do
     712      135124 :                     if (BETA == ZERO) then
     713       34764 :                         matC(irow, icol) = temp * GET_RE(matA(irow, irow)) + ALPHA * dumm
     714             :                     else
     715       79916 :                         matC(irow, icol) = BETA * matC(irow, icol) + temp * GET_RE(matA(irow, irow)) + ALPHA * dumm
     716             :                     end if
     717             :                 end do
     718             :             end do
     719             :         end block
     720             :         ! BLAS 3 SYMM/HEMM: Form matC := alpha * matA(lower-triangular) * matB + beta * matC.
     721             : #elif   SLA_ENABLED && SFB_ENABLED && (CSA_ENABLED || CHA_ENABLED) && CNB_ENABLED && BLAS_ENABLED && DISPATCH_ENABLED
     722             :         call SHMM("L", "L", nrow, ncol, ALPHA, matA, size(matA, 1, IK), matB, size(matB, 1, IK), BETA, matC, size(matC, 1, IK))
     723             : #elif   SLA_ENABLED && SFB_ENABLED && (CSA_ENABLED || CHA_ENABLED) && CNB_ENABLED
     724             :         block
     725             :             TYPE_KIND :: temp, dumm
     726       24092 :             do icol = 1, ncol
     727      138768 :                 do irow = nrow, 1, -1
     728      114676 :                     temp = ALPHA * matB(irow, icol)
     729       53040 :                     dumm = ZERO
     730      469368 :                     do idum = irow + 1, nrow
     731      354692 :                         matC(idum, icol) = matC(idum, icol) + temp * matA(idum, irow)
     732      469368 :                         dumm = dumm + matB(idum, icol) * CONJG_A(matA(idum, irow))
     733             :                     end do
     734      135124 :                     if (BETA == ZERO) then
     735       34764 :                         matC(irow, icol) = temp * GET_RE(matA(irow, irow)) + ALPHA * dumm
     736             :                     else
     737       79912 :                         matC(irow, icol) = BETA * matC(irow, icol) + temp * GET_RE(matA(irow, irow)) + ALPHA * dumm
     738             :                     end if
     739             :                 end do
     740             :             end do
     741             :         end block
     742             :         ! BLAS 3 SYMM/HEMM: Form matC := alpha * matA * matB(triangular) + beta * matC.
     743             : #elif   SFA_ENABLED && SUB_ENABLED && CNA_ENABLED && (CSB_ENABLED || CHB_ENABLED) && BLAS_ENABLED && DISPATCH_ENABLED
     744             :         call SHMM("R", "U", ncol, nrow, ALPHA, matB, size(matB, 1, IK), matA, size(matA, 1, IK), BETA, matC, size(matC, 1, IK))
     745             : #elif   SFA_ENABLED && SLB_ENABLED && CNA_ENABLED && (CSB_ENABLED || CHB_ENABLED) && BLAS_ENABLED && DISPATCH_ENABLED
     746             :         call SHMM("R", "L", ncol, nrow, ALPHA, matB, size(matB, 1, IK), matA, size(matA, 1, IK), BETA, matC, size(matC, 1, IK))
     747             : #elif   SFA_ENABLED && (SUB_ENABLED || SLB_ENABLED) && CNA_ENABLED && (CSB_ENABLED || CHB_ENABLED)
     748             :         block
     749             :             TYPE_KIND :: temp
     750       48180 :             do icol = 1, ncol
     751       40890 :                 temp = ALPHA * GET_RE(matB(icol, icol))
     752       40890 :                 if (BETA == ZERO) then
     753       82076 :                     do irow = 1, nrow
     754       82076 :                         matC(irow, icol) = temp * matA(irow, icol)
     755             :                     end do
     756             :                 else
     757      188136 :                     do irow = 1, nrow
     758      188136 :                         matC(irow, icol) = BETA * matC(irow, icol) + temp * matA(irow, icol)
     759             :                     end do
     760             :                 end if
     761      166440 :                 do idum = 1, icol - 1
     762             : #if                 SUB_ENABLED
     763       62775 :                     temp = ALPHA * matB(idum, icol)
     764             : #elif               SLB_ENABLED
     765       62775 :                     temp = ALPHA * CONJG_B(matB(icol, idum))
     766             : #else
     767             : #error              "Unrecognized interface."
     768             : #endif
     769      859350 :                     do irow = 1, nrow
     770      818460 :                         matC(irow, icol) = matC(irow, icol) + temp * matA(irow, idum)
     771             :                     end do
     772             :                 end do
     773      173730 :                 do idum = icol + 1, ncol
     774             : #if                 SUB_ENABLED
     775       62775 :                     temp = ALPHA * CONJG_B(matB(icol, idum))
     776             : #elif               SLB_ENABLED
     777       62775 :                     temp = ALPHA * matB(idum, icol)
     778             : #else
     779             : #error              "Unrecognized interface."
     780             : #endif
     781      859350 :                     do irow = 1, nrow
     782      818460 :                         matC(irow, icol) = matC(irow, icol) + temp * matA(irow, idum)
     783             :                     end do
     784             :                 end do
     785             :             end do
     786             :         end block
     787             : #else
     788             : #error  "Unrecognized interface."
     789             : #endif
     790             : 
     791             :         !%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
     792             : #elif   setMatMulAdd_ENABLED && gemm_ENABLED
     793             :         !%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
     794             : 
     795             :         integer(IK) :: irow, icol
     796             : #if     !(BLAS_ENABLED && DISPATCH_ENABLED)
     797             :         integer(IK) :: idum
     798             : #endif
     799             :         ! Define the implied shapes and perform bound checks.
     800             : #if     ASS_ENABLED
     801             :         integer(IK) :: nrow, ncol, ndum
     802       16150 :         DECLARE_SET_DEFAULT_ALPHA_BETA
     803       16150 :         nrow = size(matC, 1, IK)
     804       16150 :         ncol = size(matC, 2, IK)
     805             :         ! Set the length of the dummy axis of `matA` but only if it is a general asymmetric matrix.
     806             : #if     SFA_ENABLED && TNA_ENABLED
     807        5384 :         ndum = size(matA, 2, IK)
     808             : #elif   SFA_ENABLED && (TSA_ENABLED || THA_ENABLED)
     809       10766 :         ndum = size(matA, 1, IK)
     810             : #else
     811             : #error  "Unrecognized interface."
     812             : #endif
     813             :         ! Check bounds.
     814             : #if     TNA_ENABLED
     815       48456 :         CHECK_ASSERTION(__LINE__, all(shape(matA, IK) == [nrow, ndum]), \
     816             :         SK_"@setMatMulAdd(): The condition `all(shape(matA) == [nrow, ndum])` must hold. shape(matA), nrow, ndum = "\
     817             :         //getStr([shape(matA, IK), nrow, ndum])) ! fpp
     818             : #else
     819       96894 :         CHECK_ASSERTION(__LINE__, all(shape(matA, IK) == [ndum, nrow]), \
     820             :         SK_"@setMatMulAdd(): The condition `all(shape(matA) == [ndum, nrow])` must hold. shape(matA), ndum, nrow = "\
     821             :         //getStr([shape(matA, IK), ndum, nrow])) ! fpp
     822             : #endif
     823             : #if     TNB_ENABLED
     824       48474 :         CHECK_ASSERTION(__LINE__, all(shape(matB, IK) == [ndum, ncol]), \
     825             :         SK_"@setMatMulAdd(): The condition `all(shape(matB) == [ndum, ncol])` must hold. shape(matB), ndum, ncol = "\
     826             :         //getStr([shape(matB, IK), ndum, ncol])) ! fpp
     827             : #else
     828       96876 :         CHECK_ASSERTION(__LINE__, all(shape(matB, IK) == [ncol, ndum]), \
     829             :         SK_"@setMatMulAdd(): The condition `all(shape(matB) == [ncol, ndum])` must hold. shape(matB), ncol, ndum = "\
     830             :         //getStr([shape(matB, IK), ncol, ndum])) ! fpp
     831             : #endif
     832             : #elif   EXP_ENABLED
     833             :         ! Ensure offsets and subset bounds are logical.
     834       50376 :         CHECK_ASSERTION(__LINE__, all(0_IK <= [nrow, ncol, ndum]), \
     835             :         SK_"@setMatMulAdd(): The condition `all(0_IK <= [nrow, ncol, ndum])` must hold. nrow, ncol, ndum = "//getStr([nrow, ncol, ndum])) ! fpp
     836       62970 :         CHECK_ASSERTION(__LINE__, all(0_IK <= 1_IK - lbound(matA, kind = IK)), \
     837             :         SK_"@setMatMulAdd(): The condition `all(0 <= [roffA, coffA])` must hold. roffA, coffA = "//getStr(1_IK - lbound(matA))) ! fpp
     838       62970 :         CHECK_ASSERTION(__LINE__, all(0_IK <= 1_IK - lbound(matB, kind = IK)), \
     839             :         SK_"@setMatMulAdd(): The condition `all(0 <= [roffB, coffB])` must hold. roffB, coffB = "//getStr(1_IK - lbound(matB))) ! fpp
     840       62970 :         CHECK_ASSERTION(__LINE__, all(0_IK <= 1_IK - lbound(matC, kind = IK)), \
     841             :         SK_"@setMatMulAdd(): The condition `all(0 <= [roffC, coffC])` must hold. roffC, coffC = "//getStr(1_IK - lbound(matC))) ! fpp
     842             :         ! Ensure subset of `matA` does not overflow the matrix bounds.
     843             : #if     TNA_ENABLED
     844       47839 :         CHECK_ASSERTION(__LINE__, all([roffA + nrow, coffA + ndum] <= shape(matA, kind = IK)), \
     845             :         SK_"@setMatMulAdd(): The condition `all([roffA + nrow, coffA + ndum] <= shape(matA))` must hold. roffA, coffA, nrow, ndum, shape(matA) = "\
     846             :         //getStr([roffA, coffA, nrow, ndum, shape(matA, IK)])) ! fpp
     847             : #elif   TSA_ENABLED || THA_ENABLED
     848       90695 :         CHECK_ASSERTION(__LINE__, all([roffA + ndum, coffA + nrow] <= shape(matA, IK)), \
     849             :         SK_"@setMatMulAdd(): The condition `all([roffA + ndum, coffA + nrow] <= shape(matA))` must hold. roffA, coffA, nrow, ndum, shape(matA) = "\
     850             :         //getStr([roffA, coffA, nrow, ndum, shape(matA, IK)])) ! fpp
     851             : #else
     852             : #error  "Unrecognized interface."
     853             : #endif
     854             :         ! Ensure subset of `matB` does not overflow the matrix bounds.
     855             : #if     TNB_ENABLED
     856       47828 :         CHECK_ASSERTION(__LINE__, all([roffB + ndum, coffB + ncol] <= shape(matB, IK)), \
     857             :         SK_"@setMatMulAdd(): The condition `all([roffB + ndum, coffB + ncol] <= shape(matB))` must hold. roffB, coffB, ncol, ndum, shape(matB) = "\
     858             :         //getStr([roffB, coffB, ncol, ndum, shape(matB, IK)])) ! fpp
     859             : #elif   TSB_ENABLED || THB_ENABLED
     860       90706 :         CHECK_ASSERTION(__LINE__, all([roffB + ncol, coffB + ndum] <= shape(matB, IK)), \
     861             :         SK_"@setMatMulAdd(): The condition `all([roffB + ncol, coffB + ndum] <= shape(matB))` must hold. roffB, coffB, ncol, ndum, shape(matB) = "\
     862             :         //getStr([roffB, coffB, ncol, ndum, shape(matB, IK)])) ! fpp
     863             : #else
     864             : #error  "Unrecognized interface."
     865             : #endif
     866             : #else
     867             : #error  "Unrecognized interface."
     868             : #endif
     869             :         ! Quick return if possible.
     870       28744 :         if (nrow == 0_IK .or. ncol == 0_IK .or. ((ALPHA == ZERO .or. ndum == 0_IK) .and. BETA == ONE)) return
     871       19751 :         if (ALPHA == ZERO .or. ndum == 0_IK) then ! The condition `.or. ndum == 0_IK` is extra to BLAS here, to bypass the implicit-interface limitation of BLAS.
     872        5148 :             if (BETA == ZERO) then
     873             :                 do concurrent(icol = 1 : ncol, irow = 1 : nrow)
     874       87084 :                     matC(irow, icol) = ZERO
     875             :                 end do
     876             :             else
     877             :                 do concurrent(icol = 1 : ncol, irow = 1 : nrow)
     878      103707 :                     matC(irow, icol) = BETA * matC(irow, icol)
     879             :                 end do
     880             :             end if
     881        5148 :             return
     882             :         end if
     883             :         ! BLAS 3 GEMM: Form matC := alpha * matA * matB + beta * matC.
     884             :         ! \todo There is a lot of room for improvement here, particularly when order of matC is large.
     885             :         ! For example, the BETA value check should be completely taken out of the icol loop.
     886             : #if     SFA_ENABLED && SFB_ENABLED && TNA_ENABLED && TNB_ENABLED && BLAS_ENABLED && DISPATCH_ENABLED
     887             :         call GEMM(TMA, TMB, ALPHA, BETA)
     888             : #elif   SFA_ENABLED && SFB_ENABLED && TNA_ENABLED && TNB_ENABLED
     889             :         block
     890             :             TYPE_KIND :: temp
     891             : #if         0
     892             :             ! assume BETA /= ZERO .and. BETA /= ONE
     893             :             do icol = 1, ncol
     894             :                 idum = 1
     895             :                 temp = ALPHA * matB(idum, icol)
     896             :                 do irow = 1, nrow
     897             :                     matC(irow, icol) = BETA * matC(irow, icol) + temp * matA(irow, idum)
     898             :                 end do
     899             :                 do idum = 2, ndum
     900             :                     temp = ALPHA * matB(idum, icol)
     901             :                     do irow = 1, nrow
     902             :                         matC(irow, icol) = matC(irow, icol) + temp * matA(irow, idum)
     903             :                     end do
     904             :                 end do
     905             :             end do
     906             : #else
     907       10643 :             do icol = 1, ncol
     908        9031 :                 if (BETA == ZERO) then
     909       18054 :                     matC(1 : nrow, icol) = ZERO
     910        6292 :                 elseif (BETA /= ONE) then
     911       18637 :                     do irow = 1, nrow
     912       18637 :                         matC(irow, icol) = BETA * matC(irow, icol)
     913             :                     end do
     914             :                 end if
     915       61386 :                 do idum = 1, ndum
     916       50743 :                     temp = ALPHA * matB(idum, icol)
     917      340806 :                     do irow = 1, nrow
     918      331775 :                         matC(irow, icol) = matC(irow, icol) + temp * matA(irow, idum)
     919             :                     end do
     920             :                 end do
     921             :             end do
     922             :         end block
     923             : #endif
     924             :         ! BLAS 3 GEMM: Form matC := alpha * transpose(matA) * matB + beta * matC
     925             : #elif   SFA_ENABLED && SFB_ENABLED && (TSA_ENABLED || THA_ENABLED) && TNB_ENABLED && BLAS_ENABLED && DISPATCH_ENABLED
     926             :         call GEMM(TMA, TMB, ALPHA, BETA)
     927             : #elif   SFA_ENABLED && SFB_ENABLED && (TSA_ENABLED || THA_ENABLED) && TNB_ENABLED
     928             :         block
     929             :             TYPE_KIND :: temp
     930       21437 :             do icol = 1, ncol
     931      121671 :                 do irow = 1, nrow
     932       46657 :                     temp = ZERO
     933      662300 :                     do idum = 1, ndum
     934      662300 :                         temp = temp + CONJG_A(matA(idum, irow)) * matB(idum, icol)
     935             :                     end do
     936      118390 :                     if (BETA == ZERO) then
     937       30626 :                         matC(irow, icol) = ALPHA * temp
     938             :                     else
     939       69608 :                         matC(irow, icol) = ALPHA * temp + BETA * matC(irow, icol)
     940             :                     end if
     941             :                 end do
     942             :             end do
     943             :         end block
     944             :         ! BLAS 3 GEMM: Form matC := alpha * matA * transpose(matB) + beta * matC
     945             : #elif   SFA_ENABLED && SFB_ENABLED && TNA_ENABLED && (TSB_ENABLED || THB_ENABLED) && BLAS_ENABLED && DISPATCH_ENABLED
     946             :         call GEMM(TMA, TMB, ALPHA, BETA)
     947             : #elif   SFA_ENABLED && SFB_ENABLED && TNA_ENABLED && (TSB_ENABLED || THB_ENABLED)
     948             :         block
     949             :             TYPE_KIND :: temp
     950       21461 :             do icol = 1, ncol
     951       18179 :                 if (BETA == ZERO) then
     952       36108 :                     matC(1 : nrow, icol) = ZERO
     953       12702 :                 else if (BETA /= ONE) then
     954       37190 :                     do irow = 1, nrow
     955       37190 :                         matC(irow, icol) = BETA * matC(irow, icol)
     956             :                     end do
     957             :                 end if
     958      123207 :                 do idum = 1, ndum
     959      101746 :                     temp = ALPHA * CONJG_B(matB(icol, idum))
     960      682021 :                     do irow = 1, nrow
     961      663842 :                         matC(irow, icol) = matC(irow, icol) + temp * matA(irow, idum)
     962             :                     end do
     963             :                 end do
     964             :             end do
     965             :         end block
     966             :         ! BLAS 3 GEMM: Form matC := alpha * transpose(matA) * transpose(matB) + beta * matC
     967             : #elif   SFA_ENABLED && SFB_ENABLED && (TSA_ENABLED || THA_ENABLED) && (TSB_ENABLED || THB_ENABLED) && BLAS_ENABLED && DISPATCH_ENABLED
     968             :         call GEMM(TMA, TMB, ALPHA, BETA)
     969             : #elif   SFA_ENABLED && SFB_ENABLED && (TSA_ENABLED || THA_ENABLED) && (TSB_ENABLED || THB_ENABLED)
     970             :         block
     971             :             TYPE_KIND :: temp
     972       42496 :             do icol = 1, ncol
     973      242464 :                 do irow = 1, nrow
     974       93032 :                     temp = ZERO
     975     1322896 :                     do idum = 1, ndum
     976     1322896 :                         temp = temp + CONJG_A(matA(idum, irow)) * CONJG_B(matB(icol, idum))
     977             :                     end do
     978      236036 :                     if (BETA == ZERO) then
     979       61244 :                         matC(irow, icol) = ALPHA * temp
     980             :                     else
     981      138724 :                         matC(irow, icol) = ALPHA * temp + BETA * matC(irow, icol)
     982             :                     end if
     983             :                 end do
     984             :             end do
     985             :         end block
     986             : #else
     987             : #error  "Unrecognized interface."
     988             : #endif
     989             : 
     990             : #else
     991             :         !%%%%%%%%%%%%%%%%%%%%%%%%
     992             : #error  "Unrecognized interface."
     993             :         !%%%%%%%%%%%%%%%%%%%%%%%%
     994             : #endif
     995             : #undef  DECLARE_SET_DEFAULT_ALPHA_BETA
     996             : #undef  TYPE_KIND
     997             : #undef  DECLARE
     998             : #undef  CONJG_A
     999             : #undef  CONJG_B
    1000             : #undef  GET_RE
    1001             : #undef  ALPHA
    1002             : #undef  BETA
    1003             : #undef  ndum
    1004             : #undef  SHMM
    1005             : #undef  GEMM
    1006             : #undef  LENB
    1007             : #undef  LENC
    1008             : #undef  TMA
    1009             : #undef  TMB
    1010             : #undef  PMV

ParaMonte: Parallel Monte Carlo and Machine Learning Library 
The Computational Data Science Lab
© Copyright 2012 - 2024