Line data Source code
1 : !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
2 : !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
3 : !!!! !!!!
4 : !!!! ParaMonte: Parallel Monte Carlo and Machine Learning Library. !!!!
5 : !!!! !!!!
6 : !!!! Copyright (C) 2012-present, The Computational Data Science Lab !!!!
7 : !!!! !!!!
8 : !!!! This file is part of the ParaMonte library. !!!!
9 : !!!! !!!!
10 : !!!! LICENSE !!!!
11 : !!!! !!!!
12 : !!!! https://github.com/cdslaborg/paramonte/blob/main/LICENSE.md !!!!
13 : !!!! !!!!
14 : !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
15 : !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
16 :
17 : !> \brief
18 : !> This include file contains procedure implementation of the generic interfaces of [pm_matrixMulAdd](@ref pm_matrixMulAdd).
19 : !>
20 : !> \finmain
21 : !>
22 : !> \author
23 : !> \AmirShahmoradi, Apr 21, 2017, 1:54 AM, Institute for Computational Engineering and Sciences (ICES), The University of Texas at Austin
24 :
25 : !%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
26 :
27 : ! Define the default optional constants.
28 : #if ASS_ENABLED && (gemv_ENABLED || spmv_ENABLED || hpmv_ENABLED || symm_ENABLED || symv_ENABLED || hemv_ENABLED || hemm_ENABLED || gemm_ENABLED)
29 : #define BETA beta_def
30 : #define ALPHA alpha_def
31 : #define DECLARE_SET_DEFAULT_ALPHA_BETA \
32 : TYPE_KIND :: alpha_def, beta_def; \
33 : beta_def = ONE; alpha_def = ONE; \
34 : if (present(beta)) beta_def = beta; \
35 : if (present(alpha)) alpha_def = alpha;
36 : #elif !EXP_ENABLED
37 : #error "Unrecognized interface."
38 : #endif
39 : ! Define the transposition of `matA` for BLAS gemv/gemm call routines.
40 : #if TNA_ENABLED
41 : #define TMA "N"
42 : #elif TSA_ENABLED
43 : #define TMA "T"
44 : #elif THA_ENABLED
45 : #define TMA "C"
46 : #endif
47 : ! Define the transposition of `matB` for BLAS gemm call routines.
48 : #if TNB_ENABLED
49 : #define TMB "N"
50 : #elif TSB_ENABLED
51 : #define TMB "T"
52 : #elif THB_ENABLED
53 : #define TMB "C"
54 : #endif
55 : ! Define the call interface for BLAS symm/hemm call routines.
56 : #if CHA_ENABLED && CHB_ENABLED
57 : #define SHMM blasHEMM
58 : #elif CSA_ENABLED || CNA_ENABLED || CSB_ENABLED || CNB_ENABLED
59 : #define SHMM blasSYMM
60 : #endif
61 : ! Define blas gemm call interface.
62 : #define GEMM(transA, tranB, alpha, beta) \
63 : blasGEMM(transA, tranB, nrow, ncol, ndum, alpha, matA(1, 1), size(matA, 1, IK), matB(1, 1), size(matB, 1, IK), beta, matC(1, 1), size(matC, 1, IK))
64 : ! Define matA transpose form.
65 : #if CK_ENABLED && (CHA_ENABLED || THA_ENABLED)
66 : #define CONJG_A(X) conjg(X)
67 : #elif THA_ENABLED || TSA_ENABLED || TNA_ENABLED || CHA_ENABLED || CSA_ENABLED || CNA_ENABLED
68 : #define CONJG_A(X) X
69 : #else
70 : #error "Unrecognized interface."
71 : #endif
72 : ! Define matB transpose form.
73 : #if CK_ENABLED && (CHB_ENABLED || THB_ENABLED)
74 : #define CONJG_B(X) conjg(X)
75 : #elif THB_ENABLED || TSB_ENABLED || TNB_ENABLED || CHB_ENABLED || CSB_ENABLED || CNB_ENABLED
76 : #define CONJG_B(X) X
77 : #else
78 : #error "Unrecognized interface."
79 : #endif
80 : ! Define the `real` component function.
81 : #if CK_ENABLED && (THA_ENABLED || THB_ENABLED || CHA_ENABLED || CHB_ENABLED)
82 : #define GET_RE(X) X%re
83 : #elif THA_ENABLED || TSA_ENABLED || TNA_ENABLED || THB_ENABLED || TSB_ENABLED || TNB_ENABLED || \
84 : CHA_ENABLED || CSA_ENABLED || CNA_ENABLED || CHB_ENABLED || CSB_ENABLED || CNB_ENABLED
85 : #define GET_RE(X) X
86 : #else
87 : #error "Unrecognized interface."
88 : #endif
89 : ! Define the additional temporary variable for triangular `matA`.
90 : !#if SLA_ENABLED || SUA_ENABLED || D2_D1_ENABLED || D1_D1_ENABLED
91 : !#define ndum nrow
92 : !#elif SLB_ENABLED || SUB_ENABLED
93 : !#define ndum ncol
94 : !#elif SFA_ENABLED && SFB_ENABLED
95 : !#else
96 : !#error "Unrecognized interface."
97 : !#endif
98 : ! Declare constants and temporary variables.
99 : #if IK_ENABLED
100 : #define TYPE_KIND integer(IKC)
101 : integer(IKC), parameter :: ZERO = 0_IKC, ONE = 1_IKC
102 : #elif CK_ENABLED
103 : #define TYPE_KIND complex(CKC)
104 : complex(CKC), parameter :: ZERO = (0._CKC, 0._CKC), ONE = (1._CKC, 0._CKC)
105 : #elif RK_ENABLED
106 : #define TYPE_KIND real(RKC)
107 : real(RKC) , parameter :: ZERO = 0._RKC, ONE = 1._RKC
108 : #else
109 : #error "Unrecognized interface."
110 : #endif
111 :
112 : ! !%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
113 : !#if setMatMulAdd_ENABLED && axpby_ENABLED
114 : ! !%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
115 : !
116 : ! integer(IK) :: nell, iell, bell, cell
117 : ! if (matA == ZERO) return
118 : ! nell =
119 : !#if ASS_ENABLED
120 : ! ! Code for both increments equal to 1
121 : ! do iell = 1, nell
122 : ! matC(iell) = matA * matB(iell) + matC(iell)
123 : ! end do
124 : !#elif EXP_ENABLED
125 : ! if (incB == 1 .and. incC == 1) then
126 : ! else ! code for unequal increments or equal increments not equal to 1.
127 : ! bell = 1
128 : ! cell = 1
129 : ! if (incB < 0) bell = (1 - nell) * incB + 1
130 : ! if (incC < 0) cell = (1 - nell) * incC + 1
131 : ! do iell = 1, nell
132 : ! matC(cell) = matC(cell) + matA * matB(bell)
133 : ! bell = bell + incB
134 : ! cell = cell + incC
135 : ! end do
136 : ! end if
137 : !#endif
138 :
139 : !%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
140 : #if setMatMulAdd_ENABLED && gemv_ENABLED
141 : !%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
142 :
143 : ! Define the implied shapes and perform bound checks.
144 : #if !(BLAS_ENABLED && DISPATCH_ENABLED)
145 : integer(IK) :: irow, icol, iell, bstart, cstart
146 : #endif
147 : #if ASS_ENABLED
148 : integer(IK) :: nrow, ncol
149 : integer(IK) , parameter :: incB = 1_IK, incC = 1_IK
150 5454 : DECLARE_SET_DEFAULT_ALPHA_BETA
151 5454 : nrow = size(matA, 1, IK)
152 5454 : ncol = size(matA, 2, IK)
153 : #elif EXP_ENABLED
154 : ! Ensure offsets and subset bounds are logical.
155 11736 : CHECK_ASSERTION(__LINE__, all(0_IK <= [nrow, ncol]), \
156 : SK_"@setMatMulAdd(): The condition `all(0_IK <= [nrow, ncol])` must hold. nrow, ncol = "//getStr([nrow, ncol])) ! fpp
157 19560 : CHECK_ASSERTION(__LINE__, all(0_IK <= 1_IK - lbound(matA, kind = IK)), \
158 : SK_"@setMatMulAdd(): The condition `all(0 <= [roffA, coffA])` must hold. roffA, coffA = "//getStr(1_IK - lbound(matA))) ! fpp
159 : ! Ensure subset of `matA` does not overflow the matrix bounds.
160 43032 : CHECK_ASSERTION(__LINE__, all([roffA + nrow, coffA + ncol] <= shape(matA, kind = IK)), \
161 : SK_"@setMatMulAdd(): The condition `all([roffA + nrow, coffA + ncol] <= shape(matA))` must hold. roffA, coffA, nrow, ncol, shape(matA) = "\
162 : //getStr([roffA, coffA, nrow, ncol, shape(matA, IK)])) ! fpp
163 : #else
164 : #error "Unrecognized interface."
165 : #endif
166 : ! Check bounds.
167 : #if TNA_ENABLED
168 : #define LENB ncol
169 : #define LENC nrow
170 21896 : CHECK_ASSERTION(__LINE__, all([ncol, nrow] == [size(matB(::abs(incB)), 1, IK), size(matC(::abs(incC)), 1, IK)]), \
171 : SK_"@setMatMulAdd(): The condition `all([nrow, ncol] == [size(matC(::abs(incC))), size(matB(::abs(incB)))])` must hold. [nrow, ncol], size(matC(::abs(incC))), size(matB(::abs(incB))) = "\
172 : //getStr([[nrow, ncol], size(matC(::abs(incC)), 1, IK), size(matB(::abs(incB)), 1, IK)])) ! fpp
173 : #elif TSA_ENABLED || THA_ENABLED
174 : #define LENB nrow
175 : #define LENC ncol
176 68618 : CHECK_ASSERTION(__LINE__, all([nrow, ncol] == [size(matB(::abs(incB)), 1, IK), size(matC(::abs(incC)), 1, IK)]), \
177 : SK_"@setMatMulAdd(): The condition `all([nrow, ncol] == [size(matB(::abs(incB))), size(matC(::abs(incC)))])` must hold. [nrow, ncol], size(matB(::abs(incB))), size(matC(::abs(incC))) = "\
178 : //getStr([shape(matA, IK), [nrow, ncol], size(matB(::abs(incB)), 1, IK), size(matC(::abs(incC)), 1, IK)])) ! fpp
179 : #endif
180 : ! Quick return if possible.
181 9366 : if (nrow == 0_IK .or. ncol == 0_IK .or. (ALPHA == ZERO .and. BETA == ONE)) return
182 : #if BLAS_ENABLED && DISPATCH_ENABLED
183 : call blasGEMV(TMA, nrow, ncol, ALPHA, matA(1,1), size(matA, 1, IK), matB(1), incB, BETA, matC(1), incC)
184 : #else
185 : ! Set `lenB` and `lenC`, the lengths of the vectors `matB` and `matC`, and set up the start points in `matB` and `matC`.
186 2814 : if (0_IK < incB) then
187 : bstart = 1
188 : else
189 1380 : bstart = 1 - (LENB - 1) * incB
190 : end if
191 2814 : if (0_IK < incC) then
192 : cstart = 1
193 : else
194 1374 : cstart = 1 - (LENC - 1) * incC
195 : end if
196 : ! Start the operations. in this version the elements of matA are accessed sequentially with one pass through matA.
197 : ! First form matC := beta * matC.
198 6651 : if (BETA /= ONE) then
199 2134 : if (incC == 1_IK) then
200 2906 : if (BETA == ZERO) then
201 9179 : matC(1 : LENC) = ZERO
202 : else
203 : do concurrent(irow = 1 : LENC)
204 9762 : matC(irow) = BETA * matC(irow)
205 : end do
206 : end if
207 : else
208 : iell = cstart
209 1731 : if (BETA == ZERO) then
210 5394 : do irow = 1, LENC
211 4542 : matC(iell) = ZERO
212 5394 : iell = iell + incC
213 : end do
214 : else
215 5688 : do irow = 1, LENC
216 4809 : matC(iell) = BETA * matC(iell)
217 5688 : iell = iell + incC
218 : end do
219 : end if
220 : end if
221 : end if
222 6651 : if (ALPHA == ZERO) return
223 : #if TNA_ENABLED
224 : ! Form matC := alpha * matA * matB + matC.
225 : block
226 : integer(IK) :: cell
227 : TYPE_KIND :: temp
228 : iell = bstart
229 711 : if (incC == 1_IK) then
230 7535 : do icol = 1, ncol
231 6373 : temp = ALPHA * matB(iell)
232 42511 : do irow = 1, nrow
233 42511 : matC(irow) = matC(irow) + temp * matA(irow, icol)
234 : end do
235 7535 : iell = iell + incB
236 : end do
237 : else
238 3934 : do icol = 1, ncol
239 3333 : temp = ALPHA * matB(iell)
240 : cell = cstart
241 21903 : do irow = 1, nrow
242 18570 : matC(cell) = matC(cell) + temp * matA(irow, icol)
243 21903 : cell = cell + incC
244 : end do
245 3934 : iell = iell + incB
246 : end do
247 : end if
248 : end block
249 : #elif TSA_ENABLED || THA_ENABLED
250 : ! Form matC := alpha * matA**T * matB + matC or matC := alpha * matA**H * matB + matC.
251 : block
252 : TYPE_KIND :: temp
253 : iell = cstart
254 1413 : if (incB == 1_IK) then
255 15476 : do icol = 1, ncol
256 5960 : temp = ZERO
257 85116 : do irow = 1, nrow
258 85116 : temp = temp + CONJG_A(matA(irow, icol)) * matB(irow)
259 : end do
260 13154 : matC(iell) = matC(iell) + ALPHA * temp
261 15476 : iell = iell + incC
262 : end do
263 : else
264 7866 : do icol = 1, ncol
265 3194 : temp = ZERO
266 : cstart = bstart
267 43918 : do irow = 1, nrow
268 37238 : temp = temp + CONJG_A(matA(irow, icol)) * matB(cstart)
269 43918 : cstart = cstart + incB
270 : end do
271 6680 : matC(iell) = matC(iell) + ALPHA * temp
272 7866 : iell = iell + incC
273 : end do
274 : end if
275 : end block
276 : #else
277 : #error "Unrecognized interface."
278 : #endif
279 : #endif
280 :
281 : !%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
282 : #elif setMatMulAdd_ENABLED && (spmv_ENABLED || hpmv_ENABLED)
283 : !%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
284 :
285 : #if spmv_ENABLED
286 : #define PMV blasSPMV
287 : #elif hpmv_ENABLED
288 : #define PMV blasHPMV
289 : #else
290 : #error "Unrecognized interface."
291 : #endif
292 : #if !(BLAS_ENABLED && DISPATCH_ENABLED)
293 : integer(IK) :: ib, ic, jb, jc, bstart, cstart, irow, icol, idum, jdum
294 : #endif
295 : ! Define the implied shapes and perform bound checks.
296 : #if ASS_ENABLED
297 : integer(IK) , parameter :: incB = 1_IK, incC = 1_IK
298 : integer(IK) :: ndim
299 7296 : DECLARE_SET_DEFAULT_ALPHA_BETA
300 7296 : ndim = size(matB, 1, IK)
301 21888 : CHECK_ASSERTION(__LINE__, ndim == size(matC, kind = IK), \
302 : SK_"@setMatMulAdd(): The condition `size(matB) == size(matC)` must hold. size(matB), size(matC) = "\
303 : //getStr([ndim, size(matC, kind = IK)])) ! fpp
304 : #elif EXP_ENABLED
305 5200 : CHECK_ASSERTION(__LINE__, 0_IK <= ndim, \
306 : SK_"@setMatMulAdd(): The condition `0 <= ndim` must hold. ndim = "//getStr(ndim)) ! fpp
307 15600 : CHECK_ASSERTION(__LINE__, all(0_IK /= [incB, incC]), \
308 : SK_"@setMatMulAdd(): The condition `all(0 /= [incB, incC])` must hold. ndim, roffA, coffA, incB, incC = "//\
309 : getStr([incB, incC])) ! fpp
310 15600 : CHECK_ASSERTION(__LINE__, abs(incB) * (ndim - 1) < size(matB, kind = IK), \
311 : SK_"@setMatMulAdd(): The condition `abs(incB) * (ndim - 1) < size(matB(:))` must hold. incB, size(matB) = "//\
312 : getStr([incB, size(matB, kind = IK)])) ! fpp
313 15600 : CHECK_ASSERTION(__LINE__, abs(incC) * (ndim - 1) < size(matC, kind = IK), \
314 : SK_"@setMatMulAdd(): The condition `abs(incC) * (ndim - 1) < size(matC(:))` must hold. incC, size(matC) = "//\
315 : getStr([incC, size(matC, kind = IK)])) ! fpp
316 : #else
317 : #error "Unrecognized interface."
318 : #endif
319 37488 : CHECK_ASSERTION(__LINE__, ndim * (ndim + 1_IK) / 2_IK == size(matA, kind = IK), \
320 : SK_"@setMatMulAdd(): The condition `ndim * (ndim + 1) / 2 == size(matA)` must hold. ndim, size(matA) = "\
321 : //getStr([ndim, size(matA, kind = IK)])) ! fpp
322 : ! Quick return if possible.
323 12496 : if (ndim == 0_IK .or. (ALPHA == ZERO .and. BETA == ONE)) return
324 : #if BLAS_ENABLED && DISPATCH_ENABLED && ((spmv_ENABLED && RK_ENABLED) || (hpmv_ENABLED && CK_ENABLED)) && SUA_ENABLED
325 : call PMV("U", ndim, ALPHA, matA(1), matB(1), incB, BETA, matC(1), incC)
326 : #elif BLAS_ENABLED && DISPATCH_ENABLED && ((spmv_ENABLED && RK_ENABLED) || (hpmv_ENABLED && CK_ENABLED)) && SLA_ENABLED
327 : call PMV("L", ndim, ALPHA, matA(1), matB(1), incB, BETA, matC(1), incC)
328 : #else
329 : ! Set up the start points in matC.
330 4124 : if (incC > 0_IK) then
331 : cstart = 1_IK
332 : else
333 2052 : cstart = 1_IK - (ndim - 1_IK) * incC
334 : end if
335 : ! Start the operations. In this version the elements of the array matA
336 : ! are accessed sequentially with ONE pass through matA.
337 : ! First Form matC := BETA * matC.
338 9828 : if (BETA /= ONE) then
339 3140 : if (incC == 1_IK) then
340 4224 : if (BETA == ZERO) then
341 14112 : do irow = 1_IK, ndim
342 14112 : matC(irow) = ZERO
343 : end do
344 : else
345 13904 : do irow = 1_IK, ndim
346 13904 : matC(irow) = BETA * matC(irow)
347 : end do
348 : end if
349 : else
350 : ic = cstart
351 2640 : if (BETA == ZERO) then
352 8652 : do irow = 1_IK, ndim
353 7352 : matC(ic) = ZERO
354 8652 : ic = ic + incC
355 : end do
356 : else
357 8712 : do irow = 1_IK, ndim
358 7372 : matC(ic) = BETA * matC(ic)
359 8712 : ic = ic + incC
360 : end do
361 : end if
362 : end if
363 : end if
364 9828 : if (ALPHA == ZERO) return
365 : ! Set up the start points in matB.
366 3096 : if (incB > 0_IK) then
367 : bstart = 1_IK
368 : else
369 1516 : bstart = 1_IK - (ndim - 1_IK) * incB
370 : end if
371 : jdum = 1_IK
372 : #if SUA_ENABLED
373 : ! Form matC when matA contains the upper triangle.
374 : block
375 : TYPE_KIND :: temp, dumm
376 1548 : if (incB == 1_IK .and. incC == 1_IK) then
377 16768 : do icol = 1_IK, ndim
378 7828 : temp = ALPHA * matB(icol)
379 6352 : dumm = ZERO
380 : idum = jdum
381 56822 : do irow = 1_IK,icol - 1_IK
382 42642 : matC(irow) = matC(irow) + temp * matA(idum)
383 42642 : dumm = dumm + CONJG_A(matA(idum)) * matB(irow)
384 56822 : idum = idum + 1_IK
385 : end do
386 14180 : matC(icol) = matC(icol) + temp * GET_RE(matA(jdum + icol - 1)) + ALPHA * dumm
387 2588 : jdum = jdum + icol
388 : end do
389 : else
390 : jb = bstart
391 : jc = cstart
392 8450 : do icol = 1_IK, ndim
393 7152 : temp = ALPHA * matB(jb)
394 3308 : dumm = ZERO
395 : ib = bstart
396 : ic = cstart
397 28796 : do idum = jdum, jdum + icol - 2
398 21644 : matC(ic) = matC(ic) + temp * matA(idum)
399 21644 : dumm = dumm + CONJG_A(matA(idum)) * matB(ib)
400 21644 : ib = ib + incB
401 28796 : ic = ic + incC
402 : end do
403 7152 : matC(jc) = matC(jc) + temp * GET_RE(matA(jdum + icol - 1)) + ALPHA * dumm
404 7152 : jb = jb + incB
405 7152 : jc = jc + incC
406 1298 : jdum = jdum + icol
407 : end do
408 : end if
409 : end block
410 : #elif SLA_ENABLED
411 : ! Form matC when matA contains the lower triangle.
412 : block
413 : TYPE_KIND :: temp, dumm
414 1548 : if (incB == 1_IK .and. incC == 1_IK) then
415 16768 : do icol = 1_IK, ndim
416 14180 : temp = ALPHA * matB(icol)
417 6352 : dumm = ZERO
418 14180 : matC(icol) = matC(icol) + temp * GET_RE(matA(jdum))
419 14180 : idum = jdum + 1_IK
420 56822 : do irow = icol + 1_IK, ndim
421 42642 : matC(irow) = matC(irow) + temp * matA(idum)
422 42642 : dumm = dumm + CONJG_A(matA(idum)) * matB(irow)
423 56822 : idum = idum + 1_IK
424 : end do
425 14180 : matC(icol) = matC(icol) + ALPHA * dumm
426 16768 : jdum = jdum + (ndim - icol + 1)
427 : end do
428 : else
429 : jb = bstart
430 : jc = cstart
431 8450 : do icol = 1_IK, ndim
432 7152 : temp = ALPHA * matB(jb)
433 3308 : dumm = ZERO
434 7152 : matC(jc) = matC(jc) + temp * GET_RE(matA(jdum))
435 : ib = jb
436 : ic = jc
437 28796 : do idum = jdum + 1_IK, jdum + ndim - icol
438 21644 : ib = ib + incB
439 21644 : ic = ic + incC
440 21644 : matC(ic) = matC(ic) + temp * matA(idum)
441 28796 : dumm = dumm + CONJG_A(matA(idum)) * matB(ib)
442 : end do
443 7152 : matC(jc) = matC(jc) + ALPHA * dumm
444 7152 : jb = jb + incB
445 7152 : jc = jc + incC
446 8450 : jdum = jdum + (ndim - icol + 1)
447 : end do
448 : end if
449 : end block
450 : #else
451 : #error "Unrecognized interface."
452 : #endif
453 : #endif
454 :
455 : !%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
456 : #elif setMatMulAdd_ENABLED && (symv_ENABLED || hemv_ENABLED)
457 : !%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
458 :
459 : #if symv_ENABLED
460 : #define PMV blasSYMV
461 : #elif hemv_ENABLED
462 : #define PMV blasHEMV
463 : #else
464 : #error "Unrecognized interface."
465 : #endif
466 : #if !(BLAS_ENABLED && DISPATCH_ENABLED && ((spmv_ENABLED && RK_ENABLED) || (hpmv_ENABLED && CK_ENABLED)) && (SLA_ENABLED || SUA_ENABLED))
467 : integer(IK) :: ib, ic, jb, jc, bstart, cstart, irow, icol
468 : #endif
469 : ! Define the implied shapes and perform bound checks.
470 : #if ASS_ENABLED
471 : integer(IK) , parameter :: incB = 1_IK, incC = 1_IK
472 : integer(IK) :: ndim
473 7320 : DECLARE_SET_DEFAULT_ALPHA_BETA
474 7320 : ndim = size(matA, 1, IK)
475 87840 : CHECK_ASSERTION(__LINE__, all(ndim == [size(matA, 2, IK), shape(matB, IK), shape(matC, IK)]), \
476 : SK_"@setMatMulAdd(): The condition `all(ndim == [shape(matA(:,:)), shape(matB(:)), shape(matC(:))])` must hold. ndim, shape(matA), shape(matB), shape(matC) = "\
477 : //getStr([ndim, shape(matA, IK), shape(matB, IK), shape(matC, IK)])) ! fpp
478 : #elif EXP_ENABLED
479 20816 : CHECK_ASSERTION(__LINE__, all(0_IK <= [ndim, roffA, coffA]), \
480 : SK_"@setMatMulAdd(): The condition `all(0 <= [ndim, roffA, coffA])` must hold. ndim, roffA, coffA = "//\
481 : getStr([ndim, roffA, coffA])) ! fpp
482 15612 : CHECK_ASSERTION(__LINE__, all(0_IK /= [incB, incC]), \
483 : SK_"@setMatMulAdd(): The condition `all(0 /= [incB, incC])` must hold. ndim, roffA, coffA, incB, incC = "//\
484 : getStr([incB, incC])) ! fpp
485 41632 : CHECK_ASSERTION(__LINE__, all(ndim <= ubound(matA, kind = IK)), \
486 : SK_"@setMatMulAdd(): The condition `all(ndim + [roffA, coffA] <= shape(matA))` must hold. ndim, ubound(matA) = "//\
487 : getStr([ndim, ubound(matA, kind = IK)])) ! fpp
488 15612 : CHECK_ASSERTION(__LINE__, abs(incB) * (ndim - 1) < size(matB, kind = IK), \
489 : SK_"@setMatMulAdd(): The condition `abs(incB) * (ndim - 1) < size(matB(:))` must hold. incB, size(matB) = "//\
490 : getStr([incB, size(matB, kind = IK)])) ! fpp
491 15612 : CHECK_ASSERTION(__LINE__, abs(incC) * (ndim - 1) < size(matC, kind = IK), \
492 : SK_"@setMatMulAdd(): The condition `abs(incC) * (ndim - 1) < size(matC(:))` must hold. incC, size(matC) = "//\
493 : getStr([incC, size(matC, kind = IK)])) ! fpp
494 : #else
495 : #error "Unrecognized interface."
496 : #endif
497 12524 : if (ndim == 0_IK .or. (ALPHA == ZERO .and. BETA == ONE)) return ! Quick return if possible.
498 : #if BLAS_ENABLED && DISPATCH_ENABLED && ((spmv_ENABLED && RK_ENABLED) || (hpmv_ENABLED && CK_ENABLED)) && SUA_ENABLED
499 : call PMV("U", ndim, ALPHA, matA(1,1), size(matA, 1, IK), matB(1), incB, BETA, matC(1), incC)
500 : #elif BLAS_ENABLED && DISPATCH_ENABLED && ((spmv_ENABLED && RK_ENABLED) || (hpmv_ENABLED && CK_ENABLED)) && SLA_ENABLED
501 : call PMV("L", ndim, ALPHA, matA(1,1), size(matA, 1, IK), matB(1), incB, BETA, matC(1), incC)
502 : #else
503 4128 : if(incC > 0_IK) then
504 : cstart = 1_IK
505 : else
506 2052 : cstart = 1_IK - (ndim - 1_IK) * incC
507 : end if
508 : ! Start the operations. In this version the elements of matA are
509 : ! accessed sequentially with ONE pass through the triangular part of matA.
510 : ! First form matC := BETA * matC.
511 9856 : if(BETA /= ONE) then
512 3142 : if(incC == 1_IK) then
513 4223 : if (BETA == ZERO) then
514 14260 : do irow = 1_IK, ndim
515 14260 : matC(irow) = ZERO
516 : end do
517 : else
518 13520 : do irow = 1_IK, ndim
519 13520 : matC(irow) = BETA * matC(irow)
520 : end do
521 : end if
522 : else
523 : ic = cstart
524 2641 : if(BETA == ZERO) then
525 8656 : do irow = 1_IK, ndim
526 7355 : matC(ic) = ZERO
527 8656 : ic = ic + incC
528 : end do
529 : else
530 8712 : do irow = 1_IK, ndim
531 7372 : matC(ic) = BETA * matC(ic)
532 8712 : ic = ic + incC
533 : end do
534 : end if
535 : end if
536 : end if
537 9856 : if(ALPHA == ZERO) return
538 : ! set up the start points in matB and matC.
539 3100 : if(incB > 0_IK) then
540 : bstart = 1_IK
541 : else
542 1518 : bstart = 1_IK - (ndim - 1_IK) * incB
543 : end if
544 : #if SUA_ENABLED
545 : ! Form matC when matA is stored in upper triangle.
546 : block
547 : TYPE_KIND :: temp, dumm
548 1550 : if(incB == 1_IK .and. incC == 1_IK) then
549 16682 : do icol = 1_IK, ndim
550 7562 : temp = ALPHA * matB(icol)
551 6520 : dumm = ZERO
552 56360 : do irow = 1_IK, icol - 1_IK
553 42278 : matC(irow) = matC(irow) + temp * matA(irow, icol)
554 56360 : dumm = dumm + CONJG_A(matA(irow, icol)) * matB(irow)
555 : end do
556 16682 : matC(icol) = matC(icol) + temp * GET_RE(matA(icol, icol)) + ALPHA * dumm
557 : end do
558 : else
559 : jb = bstart
560 : jc = cstart
561 8458 : do icol = 1_IK, ndim
562 3850 : temp = ALPHA * matB(jb)
563 3308 : dumm = ZERO
564 : ib = bstart
565 : ic = cstart
566 28808 : do irow = 1_IK, icol - 1_IK
567 21650 : matC(ic) = matC(ic) + temp * matA(irow, icol)
568 21650 : dumm = dumm + CONJG_A(matA(irow, icol)) * matB(ib)
569 21650 : ib = ib + incB
570 28808 : ic = ic + incC
571 : end do
572 7158 : matC(jc) = matC(jc) + temp * GET_RE(matA(icol, icol)) + ALPHA * dumm
573 7158 : jb = jb + incB
574 8458 : jc = jc + incC
575 : end do
576 : end if
577 : end block
578 : #elif SLA_ENABLED
579 : ! Form matC when matA is stored in lower triangle.
580 : block
581 : TYPE_KIND :: temp, dumm
582 1550 : if (incB == 1_IK .and. incC == 1_IK) then
583 16682 : do icol = 1_IK, ndim
584 14082 : temp = ALPHA * matB(icol)
585 6520 : dumm = ZERO
586 14082 : matC(icol) = matC(icol) + temp * GET_RE(matA(icol, icol))
587 56360 : do irow = icol + 1_IK, ndim
588 42278 : matC(irow) = matC(irow) + temp * matA(irow, icol)
589 56360 : dumm = dumm + CONJG_A(matA(irow, icol)) * matB(irow)
590 : end do
591 16682 : matC(icol) = matC(icol) + ALPHA * dumm
592 : end do
593 : else
594 : jb = bstart
595 : jc = cstart
596 8458 : do icol = 1_IK, ndim
597 7158 : temp = ALPHA * matB(jb)
598 3308 : dumm = ZERO
599 7158 : matC(jc) = matC(jc) + temp * GET_RE(matA(icol, icol))
600 : ib = jb
601 : ic = jc
602 28808 : do irow = icol + 1_IK, ndim
603 21650 : ib = ib + incB
604 21650 : ic = ic + incC
605 21650 : matC(ic) = matC(ic) + temp * matA(irow, icol)
606 28808 : dumm = dumm + CONJG_A(matA(irow, icol)) * matB(ib)
607 : end do
608 7158 : matC(jc) = matC(jc) + ALPHA * dumm
609 7158 : jb = jb + incB
610 8458 : jc = jc + incC
611 : end do
612 : end if
613 : end block
614 : #else
615 : #error "Unrecognized interface."
616 : #endif
617 : #endif
618 :
619 : !%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
620 : #elif setMatMulAdd_ENABLED && (symm_ENABLED || hemm_ENABLED)
621 : !%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
622 :
623 : #if !(BLAS_ENABLED && DISPATCH_ENABLED)
624 : integer(IK) :: idum, irow, icol
625 : #endif
626 : ! Define the implied shapes and perform bound checks.
627 : #if ASS_ENABLED
628 : integer(IK) :: nrow, ncol
629 14772 : DECLARE_SET_DEFAULT_ALPHA_BETA
630 14772 : nrow = size(matC, 1, IK)
631 14772 : ncol = size(matC, 2, IK)
632 : ! Check bounds.
633 : #if CNA_ENABLED
634 66474 : CHECK_ASSERTION(__LINE__, all(shape(matA, IK) == [nrow, ncol]), \
635 : SK_"@setMatMulAdd(): The condition `all(shape(matA) == shape(matC))` must hold. shape(matA), nrow, ncol = "\
636 : //getStr([shape(matA, IK), nrow, ncol])) ! fpp
637 : #else
638 59088 : CHECK_ASSERTION(__LINE__, all(shape(matA, IK) == [nrow, nrow]), \
639 : SK_"@setMatMulAdd(): The condition `all(shape(matA) == size(matC, 1))` must hold. shape(matA), size(matC, 1) = "\
640 : //getStr([shape(matA, IK), nrow])) ! fpp
641 : #endif
642 : #if CNB_ENABLED
643 66474 : CHECK_ASSERTION(__LINE__, all(shape(matB, IK) == [nrow, ncol]), \
644 : SK_"@setMatMulAdd(): The condition `all(shape(matB) == shape(matC))` must hold. shape(matB), shape(matC) = "\
645 : //getStr([shape(matB, IK), nrow, ncol])) ! fpp
646 : #else
647 59088 : CHECK_ASSERTION(__LINE__, all(shape(matB, IK) == [ncol, ncol]), \
648 : SK_"@setMatMulAdd(): The condition `all(shape(matB) == size(matC, 2))` must hold. shape(matB), size(matC, 2) = "\
649 : //getStr([shape(matB, IK), ncol])) ! fpp
650 : #endif
651 : #elif EXP_ENABLED
652 : ! Ensure offsets and subset bounds are logical.
653 31218 : CHECK_ASSERTION(__LINE__, all(0_IK <= [nrow, ncol]), \
654 : SK_"@setMatMulAdd(): The condition `all(0_IK <= shape(matC))` must hold. shape(matC) = "//getStr([nrow, ncol])) ! fpp
655 52030 : CHECK_ASSERTION(__LINE__, all(0_IK <= 1_IK - lbound(matA, kind = IK)), \
656 : SK_"@setMatMulAdd(): The condition `all(0 <= [roffA, coffA])` must hold. roffA, coffA = "//getStr(1_IK - lbound(matA))) ! fpp
657 52030 : CHECK_ASSERTION(__LINE__, all(0_IK <= 1_IK - lbound(matB, kind = IK)), \
658 : SK_"@setMatMulAdd(): The condition `all(0 <= [roffB, coffB])` must hold. roffB, coffB = "//getStr(1_IK - lbound(matB))) ! fpp
659 52030 : CHECK_ASSERTION(__LINE__, all(0_IK <= 1_IK - lbound(matC, kind = IK)), \
660 : SK_"@setMatMulAdd(): The condition `all(0 <= [roffC, coffC])` must hold. roffC, coffC = "//getStr(1_IK - lbound(matC))) ! fpp
661 : ! Ensure subset of `matA` does not overflow the matrix bounds.
662 : #if CNA_ENABLED
663 57244 : CHECK_ASSERTION(__LINE__, all([nrow, ncol] <= ubound(matA, kind = IK)), \
664 : SK_"@setMatMulAdd(): The condition `all([nrow + roffA, ncol + coffA] <= shape(matA))` must hold. nrow, ncol, roffA, coffA, shape(matA) = "\
665 : //getStr([nrow, ncol, roffA, coffA, shape(matA, IK)])) ! fpp
666 : #else
667 52020 : CHECK_ASSERTION(__LINE__, all([nrow, nrow] <= ubound(matA, kind = IK)), \
668 : SK_"@setMatMulAdd(): The condition `all([nrow + roffA, nrow + coffA] <= shape(matA))` must hold. nrow, roffA, coffA, shape(matA) = "\
669 : //getStr([nrow, roffA, coffA, shape(matA, IK)])) ! fpp
670 : #endif
671 : ! Ensure subset of `matB` does not overflow the matrix bounds.
672 : #if CNB_ENABLED
673 57222 : CHECK_ASSERTION(__LINE__, all([nrow, ncol] <= ubound(matB, kind = IK)), \
674 : SK_"@setMatMulAdd(): The condition `all([nrow + roffB, ncol + coffB] <= shape(matB))` must hold. nrow, ncol, roffB, coffB, shape(matB) = "\
675 : //getStr([nrow, ncol, roffB, coffB, shape(matB, IK)])) ! fpp
676 : #else
677 52040 : CHECK_ASSERTION(__LINE__, all([ncol, ncol] <= ubound(matB, kind = IK)), \
678 : SK_"@setMatMulAdd(): The condition `all([ncol + roffB, ncol + coffB] <= shape(matB))` must hold. ncol, roffB, coffB, shape(matB) = "\
679 : //getStr([ncol, roffB, coffB, shape(matB, IK)])) ! fpp
680 : #endif
681 : #else
682 : #error "Unrecognized interface."
683 : #endif
684 : ! Quick return if possible.
685 25178 : if (nrow == 0_IK .or. ncol == 0_IK .or. (ALPHA == ZERO .and. BETA == ONE)) return
686 18242 : if (ALPHA == ZERO) then
687 3664 : if (BETA == ZERO) then
688 : do concurrent(icol = 1 : ncol, irow = 1 : nrow)
689 59824 : matC(irow, icol) = ZERO
690 : end do
691 : else
692 : do concurrent(icol = 1 : ncol, irow = 1 : nrow)
693 72016 : matC(irow, icol) = BETA * matC(irow, icol)
694 : end do
695 : end if
696 3664 : return
697 : end if
698 : ! BLAS 3 SYMM/HEMM: Form matC := alpha * matA(upper-triangular) * matB + beta * matC.
699 : #if SUA_ENABLED && SFB_ENABLED && (CSA_ENABLED || CHA_ENABLED) && CNB_ENABLED && BLAS_ENABLED && DISPATCH_ENABLED
700 : call SHMM("L", "U", nrow, ncol, ALPHA, matA, size(matA, 1, IK), matB, size(matB, 1, IK), BETA, matC, size(matC, 1, IK))
701 : #elif SUA_ENABLED && SFB_ENABLED && (CSA_ENABLED || CHA_ENABLED) && CNB_ENABLED
702 : block
703 : TYPE_KIND :: temp, dumm
704 24088 : do icol = 1, ncol
705 138768 : do irow = 1, nrow
706 61640 : temp = ALPHA * matB(irow, icol)
707 53040 : dumm = ZERO
708 469416 : do idum = 1, irow - 1
709 354736 : matC(idum, icol) = matC(idum, icol) + temp * matA(idum, irow)
710 469416 : dumm = dumm + matB(idum, icol) * CONJG_A(matA(idum, irow))
711 : end do
712 135124 : if (BETA == ZERO) then
713 34764 : matC(irow, icol) = temp * GET_RE(matA(irow, irow)) + ALPHA * dumm
714 : else
715 79916 : matC(irow, icol) = BETA * matC(irow, icol) + temp * GET_RE(matA(irow, irow)) + ALPHA * dumm
716 : end if
717 : end do
718 : end do
719 : end block
720 : ! BLAS 3 SYMM/HEMM: Form matC := alpha * matA(lower-triangular) * matB + beta * matC.
721 : #elif SLA_ENABLED && SFB_ENABLED && (CSA_ENABLED || CHA_ENABLED) && CNB_ENABLED && BLAS_ENABLED && DISPATCH_ENABLED
722 : call SHMM("L", "L", nrow, ncol, ALPHA, matA, size(matA, 1, IK), matB, size(matB, 1, IK), BETA, matC, size(matC, 1, IK))
723 : #elif SLA_ENABLED && SFB_ENABLED && (CSA_ENABLED || CHA_ENABLED) && CNB_ENABLED
724 : block
725 : TYPE_KIND :: temp, dumm
726 24092 : do icol = 1, ncol
727 138768 : do irow = nrow, 1, -1
728 114676 : temp = ALPHA * matB(irow, icol)
729 53040 : dumm = ZERO
730 469368 : do idum = irow + 1, nrow
731 354692 : matC(idum, icol) = matC(idum, icol) + temp * matA(idum, irow)
732 469368 : dumm = dumm + matB(idum, icol) * CONJG_A(matA(idum, irow))
733 : end do
734 135124 : if (BETA == ZERO) then
735 34764 : matC(irow, icol) = temp * GET_RE(matA(irow, irow)) + ALPHA * dumm
736 : else
737 79912 : matC(irow, icol) = BETA * matC(irow, icol) + temp * GET_RE(matA(irow, irow)) + ALPHA * dumm
738 : end if
739 : end do
740 : end do
741 : end block
742 : ! BLAS 3 SYMM/HEMM: Form matC := alpha * matA * matB(triangular) + beta * matC.
743 : #elif SFA_ENABLED && SUB_ENABLED && CNA_ENABLED && (CSB_ENABLED || CHB_ENABLED) && BLAS_ENABLED && DISPATCH_ENABLED
744 : call SHMM("R", "U", ncol, nrow, ALPHA, matB, size(matB, 1, IK), matA, size(matA, 1, IK), BETA, matC, size(matC, 1, IK))
745 : #elif SFA_ENABLED && SLB_ENABLED && CNA_ENABLED && (CSB_ENABLED || CHB_ENABLED) && BLAS_ENABLED && DISPATCH_ENABLED
746 : call SHMM("R", "L", ncol, nrow, ALPHA, matB, size(matB, 1, IK), matA, size(matA, 1, IK), BETA, matC, size(matC, 1, IK))
747 : #elif SFA_ENABLED && (SUB_ENABLED || SLB_ENABLED) && CNA_ENABLED && (CSB_ENABLED || CHB_ENABLED)
748 : block
749 : TYPE_KIND :: temp
750 48180 : do icol = 1, ncol
751 40890 : temp = ALPHA * GET_RE(matB(icol, icol))
752 40890 : if (BETA == ZERO) then
753 82076 : do irow = 1, nrow
754 82076 : matC(irow, icol) = temp * matA(irow, icol)
755 : end do
756 : else
757 188136 : do irow = 1, nrow
758 188136 : matC(irow, icol) = BETA * matC(irow, icol) + temp * matA(irow, icol)
759 : end do
760 : end if
761 166440 : do idum = 1, icol - 1
762 : #if SUB_ENABLED
763 62775 : temp = ALPHA * matB(idum, icol)
764 : #elif SLB_ENABLED
765 62775 : temp = ALPHA * CONJG_B(matB(icol, idum))
766 : #else
767 : #error "Unrecognized interface."
768 : #endif
769 859350 : do irow = 1, nrow
770 818460 : matC(irow, icol) = matC(irow, icol) + temp * matA(irow, idum)
771 : end do
772 : end do
773 173730 : do idum = icol + 1, ncol
774 : #if SUB_ENABLED
775 62775 : temp = ALPHA * CONJG_B(matB(icol, idum))
776 : #elif SLB_ENABLED
777 62775 : temp = ALPHA * matB(idum, icol)
778 : #else
779 : #error "Unrecognized interface."
780 : #endif
781 859350 : do irow = 1, nrow
782 818460 : matC(irow, icol) = matC(irow, icol) + temp * matA(irow, idum)
783 : end do
784 : end do
785 : end do
786 : end block
787 : #else
788 : #error "Unrecognized interface."
789 : #endif
790 :
791 : !%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
792 : #elif setMatMulAdd_ENABLED && gemm_ENABLED
793 : !%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
794 :
795 : integer(IK) :: irow, icol
796 : #if !(BLAS_ENABLED && DISPATCH_ENABLED)
797 : integer(IK) :: idum
798 : #endif
799 : ! Define the implied shapes and perform bound checks.
800 : #if ASS_ENABLED
801 : integer(IK) :: nrow, ncol, ndum
802 16150 : DECLARE_SET_DEFAULT_ALPHA_BETA
803 16150 : nrow = size(matC, 1, IK)
804 16150 : ncol = size(matC, 2, IK)
805 : ! Set the length of the dummy axis of `matA` but only if it is a general asymmetric matrix.
806 : #if SFA_ENABLED && TNA_ENABLED
807 5384 : ndum = size(matA, 2, IK)
808 : #elif SFA_ENABLED && (TSA_ENABLED || THA_ENABLED)
809 10766 : ndum = size(matA, 1, IK)
810 : #else
811 : #error "Unrecognized interface."
812 : #endif
813 : ! Check bounds.
814 : #if TNA_ENABLED
815 48456 : CHECK_ASSERTION(__LINE__, all(shape(matA, IK) == [nrow, ndum]), \
816 : SK_"@setMatMulAdd(): The condition `all(shape(matA) == [nrow, ndum])` must hold. shape(matA), nrow, ndum = "\
817 : //getStr([shape(matA, IK), nrow, ndum])) ! fpp
818 : #else
819 96894 : CHECK_ASSERTION(__LINE__, all(shape(matA, IK) == [ndum, nrow]), \
820 : SK_"@setMatMulAdd(): The condition `all(shape(matA) == [ndum, nrow])` must hold. shape(matA), ndum, nrow = "\
821 : //getStr([shape(matA, IK), ndum, nrow])) ! fpp
822 : #endif
823 : #if TNB_ENABLED
824 48474 : CHECK_ASSERTION(__LINE__, all(shape(matB, IK) == [ndum, ncol]), \
825 : SK_"@setMatMulAdd(): The condition `all(shape(matB) == [ndum, ncol])` must hold. shape(matB), ndum, ncol = "\
826 : //getStr([shape(matB, IK), ndum, ncol])) ! fpp
827 : #else
828 96876 : CHECK_ASSERTION(__LINE__, all(shape(matB, IK) == [ncol, ndum]), \
829 : SK_"@setMatMulAdd(): The condition `all(shape(matB) == [ncol, ndum])` must hold. shape(matB), ncol, ndum = "\
830 : //getStr([shape(matB, IK), ncol, ndum])) ! fpp
831 : #endif
832 : #elif EXP_ENABLED
833 : ! Ensure offsets and subset bounds are logical.
834 50376 : CHECK_ASSERTION(__LINE__, all(0_IK <= [nrow, ncol, ndum]), \
835 : SK_"@setMatMulAdd(): The condition `all(0_IK <= [nrow, ncol, ndum])` must hold. nrow, ncol, ndum = "//getStr([nrow, ncol, ndum])) ! fpp
836 62970 : CHECK_ASSERTION(__LINE__, all(0_IK <= 1_IK - lbound(matA, kind = IK)), \
837 : SK_"@setMatMulAdd(): The condition `all(0 <= [roffA, coffA])` must hold. roffA, coffA = "//getStr(1_IK - lbound(matA))) ! fpp
838 62970 : CHECK_ASSERTION(__LINE__, all(0_IK <= 1_IK - lbound(matB, kind = IK)), \
839 : SK_"@setMatMulAdd(): The condition `all(0 <= [roffB, coffB])` must hold. roffB, coffB = "//getStr(1_IK - lbound(matB))) ! fpp
840 62970 : CHECK_ASSERTION(__LINE__, all(0_IK <= 1_IK - lbound(matC, kind = IK)), \
841 : SK_"@setMatMulAdd(): The condition `all(0 <= [roffC, coffC])` must hold. roffC, coffC = "//getStr(1_IK - lbound(matC))) ! fpp
842 : ! Ensure subset of `matA` does not overflow the matrix bounds.
843 : #if TNA_ENABLED
844 47839 : CHECK_ASSERTION(__LINE__, all([roffA + nrow, coffA + ndum] <= shape(matA, kind = IK)), \
845 : SK_"@setMatMulAdd(): The condition `all([roffA + nrow, coffA + ndum] <= shape(matA))` must hold. roffA, coffA, nrow, ndum, shape(matA) = "\
846 : //getStr([roffA, coffA, nrow, ndum, shape(matA, IK)])) ! fpp
847 : #elif TSA_ENABLED || THA_ENABLED
848 90695 : CHECK_ASSERTION(__LINE__, all([roffA + ndum, coffA + nrow] <= shape(matA, IK)), \
849 : SK_"@setMatMulAdd(): The condition `all([roffA + ndum, coffA + nrow] <= shape(matA))` must hold. roffA, coffA, nrow, ndum, shape(matA) = "\
850 : //getStr([roffA, coffA, nrow, ndum, shape(matA, IK)])) ! fpp
851 : #else
852 : #error "Unrecognized interface."
853 : #endif
854 : ! Ensure subset of `matB` does not overflow the matrix bounds.
855 : #if TNB_ENABLED
856 47828 : CHECK_ASSERTION(__LINE__, all([roffB + ndum, coffB + ncol] <= shape(matB, IK)), \
857 : SK_"@setMatMulAdd(): The condition `all([roffB + ndum, coffB + ncol] <= shape(matB))` must hold. roffB, coffB, ncol, ndum, shape(matB) = "\
858 : //getStr([roffB, coffB, ncol, ndum, shape(matB, IK)])) ! fpp
859 : #elif TSB_ENABLED || THB_ENABLED
860 90706 : CHECK_ASSERTION(__LINE__, all([roffB + ncol, coffB + ndum] <= shape(matB, IK)), \
861 : SK_"@setMatMulAdd(): The condition `all([roffB + ncol, coffB + ndum] <= shape(matB))` must hold. roffB, coffB, ncol, ndum, shape(matB) = "\
862 : //getStr([roffB, coffB, ncol, ndum, shape(matB, IK)])) ! fpp
863 : #else
864 : #error "Unrecognized interface."
865 : #endif
866 : #else
867 : #error "Unrecognized interface."
868 : #endif
869 : ! Quick return if possible.
870 28744 : if (nrow == 0_IK .or. ncol == 0_IK .or. ((ALPHA == ZERO .or. ndum == 0_IK) .and. BETA == ONE)) return
871 19751 : if (ALPHA == ZERO .or. ndum == 0_IK) then ! The condition `.or. ndum == 0_IK` is extra to BLAS here, to bypass the implicit-interface limitation of BLAS.
872 5148 : if (BETA == ZERO) then
873 : do concurrent(icol = 1 : ncol, irow = 1 : nrow)
874 87084 : matC(irow, icol) = ZERO
875 : end do
876 : else
877 : do concurrent(icol = 1 : ncol, irow = 1 : nrow)
878 103707 : matC(irow, icol) = BETA * matC(irow, icol)
879 : end do
880 : end if
881 5148 : return
882 : end if
883 : ! BLAS 3 GEMM: Form matC := alpha * matA * matB + beta * matC.
884 : ! \todo There is a lot of room for improvement here, particularly when order of matC is large.
885 : ! For example, the BETA value check should be completely taken out of the icol loop.
886 : #if SFA_ENABLED && SFB_ENABLED && TNA_ENABLED && TNB_ENABLED && BLAS_ENABLED && DISPATCH_ENABLED
887 : call GEMM(TMA, TMB, ALPHA, BETA)
888 : #elif SFA_ENABLED && SFB_ENABLED && TNA_ENABLED && TNB_ENABLED
889 : block
890 : TYPE_KIND :: temp
891 : #if 0
892 : ! assume BETA /= ZERO .and. BETA /= ONE
893 : do icol = 1, ncol
894 : idum = 1
895 : temp = ALPHA * matB(idum, icol)
896 : do irow = 1, nrow
897 : matC(irow, icol) = BETA * matC(irow, icol) + temp * matA(irow, idum)
898 : end do
899 : do idum = 2, ndum
900 : temp = ALPHA * matB(idum, icol)
901 : do irow = 1, nrow
902 : matC(irow, icol) = matC(irow, icol) + temp * matA(irow, idum)
903 : end do
904 : end do
905 : end do
906 : #else
907 10643 : do icol = 1, ncol
908 9031 : if (BETA == ZERO) then
909 18054 : matC(1 : nrow, icol) = ZERO
910 6292 : elseif (BETA /= ONE) then
911 18637 : do irow = 1, nrow
912 18637 : matC(irow, icol) = BETA * matC(irow, icol)
913 : end do
914 : end if
915 61386 : do idum = 1, ndum
916 50743 : temp = ALPHA * matB(idum, icol)
917 340806 : do irow = 1, nrow
918 331775 : matC(irow, icol) = matC(irow, icol) + temp * matA(irow, idum)
919 : end do
920 : end do
921 : end do
922 : end block
923 : #endif
924 : ! BLAS 3 GEMM: Form matC := alpha * transpose(matA) * matB + beta * matC
925 : #elif SFA_ENABLED && SFB_ENABLED && (TSA_ENABLED || THA_ENABLED) && TNB_ENABLED && BLAS_ENABLED && DISPATCH_ENABLED
926 : call GEMM(TMA, TMB, ALPHA, BETA)
927 : #elif SFA_ENABLED && SFB_ENABLED && (TSA_ENABLED || THA_ENABLED) && TNB_ENABLED
928 : block
929 : TYPE_KIND :: temp
930 21437 : do icol = 1, ncol
931 121671 : do irow = 1, nrow
932 46657 : temp = ZERO
933 662300 : do idum = 1, ndum
934 662300 : temp = temp + CONJG_A(matA(idum, irow)) * matB(idum, icol)
935 : end do
936 118390 : if (BETA == ZERO) then
937 30626 : matC(irow, icol) = ALPHA * temp
938 : else
939 69608 : matC(irow, icol) = ALPHA * temp + BETA * matC(irow, icol)
940 : end if
941 : end do
942 : end do
943 : end block
944 : ! BLAS 3 GEMM: Form matC := alpha * matA * transpose(matB) + beta * matC
945 : #elif SFA_ENABLED && SFB_ENABLED && TNA_ENABLED && (TSB_ENABLED || THB_ENABLED) && BLAS_ENABLED && DISPATCH_ENABLED
946 : call GEMM(TMA, TMB, ALPHA, BETA)
947 : #elif SFA_ENABLED && SFB_ENABLED && TNA_ENABLED && (TSB_ENABLED || THB_ENABLED)
948 : block
949 : TYPE_KIND :: temp
950 21461 : do icol = 1, ncol
951 18179 : if (BETA == ZERO) then
952 36108 : matC(1 : nrow, icol) = ZERO
953 12702 : else if (BETA /= ONE) then
954 37190 : do irow = 1, nrow
955 37190 : matC(irow, icol) = BETA * matC(irow, icol)
956 : end do
957 : end if
958 123207 : do idum = 1, ndum
959 101746 : temp = ALPHA * CONJG_B(matB(icol, idum))
960 682021 : do irow = 1, nrow
961 663842 : matC(irow, icol) = matC(irow, icol) + temp * matA(irow, idum)
962 : end do
963 : end do
964 : end do
965 : end block
966 : ! BLAS 3 GEMM: Form matC := alpha * transpose(matA) * transpose(matB) + beta * matC
967 : #elif SFA_ENABLED && SFB_ENABLED && (TSA_ENABLED || THA_ENABLED) && (TSB_ENABLED || THB_ENABLED) && BLAS_ENABLED && DISPATCH_ENABLED
968 : call GEMM(TMA, TMB, ALPHA, BETA)
969 : #elif SFA_ENABLED && SFB_ENABLED && (TSA_ENABLED || THA_ENABLED) && (TSB_ENABLED || THB_ENABLED)
970 : block
971 : TYPE_KIND :: temp
972 42496 : do icol = 1, ncol
973 242464 : do irow = 1, nrow
974 93032 : temp = ZERO
975 1322896 : do idum = 1, ndum
976 1322896 : temp = temp + CONJG_A(matA(idum, irow)) * CONJG_B(matB(icol, idum))
977 : end do
978 236036 : if (BETA == ZERO) then
979 61244 : matC(irow, icol) = ALPHA * temp
980 : else
981 138724 : matC(irow, icol) = ALPHA * temp + BETA * matC(irow, icol)
982 : end if
983 : end do
984 : end do
985 : end block
986 : #else
987 : #error "Unrecognized interface."
988 : #endif
989 :
990 : #else
991 : !%%%%%%%%%%%%%%%%%%%%%%%%
992 : #error "Unrecognized interface."
993 : !%%%%%%%%%%%%%%%%%%%%%%%%
994 : #endif
995 : #undef DECLARE_SET_DEFAULT_ALPHA_BETA
996 : #undef TYPE_KIND
997 : #undef DECLARE
998 : #undef CONJG_A
999 : #undef CONJG_B
1000 : #undef GET_RE
1001 : #undef ALPHA
1002 : #undef BETA
1003 : #undef ndum
1004 : #undef SHMM
1005 : #undef GEMM
1006 : #undef LENB
1007 : #undef LENC
1008 : #undef TMA
1009 : #undef TMB
1010 : #undef PMV
|