Line data Source code
1 : !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
2 : !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
3 : !!!! !!!!
4 : !!!! ParaMonte: Parallel Monte Carlo and Machine Learning Library. !!!!
5 : !!!! !!!!
6 : !!!! Copyright (C) 2012-present, The Computational Data Science Lab !!!!
7 : !!!! !!!!
8 : !!!! This file is part of the ParaMonte library. !!!!
9 : !!!! !!!!
10 : !!!! LICENSE !!!!
11 : !!!! !!!!
12 : !!!! https://github.com/cdslaborg/paramonte/blob/main/LICENSE.md !!!!
13 : !!!! !!!!
14 : !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
15 : !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
16 :
17 : !> \brief
18 : !> This include file contains procedure implementations of [pm_optimization](@ref pm_optimization).
19 : !>
20 : !> \finmain
21 : !>
22 : !> \author
23 : !> \AmirShahmoradi, Tuesday March 7, 2017, 3:50 AM, Institute for Computational Engineering and Sciences (ICES), The University of Texas at Austin
24 :
25 : !%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
26 :
27 : !%%%%%%%%%%%%%%%%%%%%%%%%%
28 : #if setForkJoinScaling_ENABLED
29 : !%%%%%%%%%%%%%%%%%%%%%%%%%
30 :
31 : !> \brief
32 : !> The scalar constant of type `integer` of default kind, representing the maximum number of
33 : !> Coarray images, or MPI processes, or OpenMP threads that the procedures of this module are guaranteed to handle.<br>
34 : !> This constant is particularly relevant to the algorithm of [setForkJoinScaling](@ref pm_parallelism::setForkJoinScaling).<br>
35 : !>
36 : !> \details
37 : !> The maximum value is set to ensure the outputs of [setForkJoinScaling](@ref pm_parallelism::setForkJoinScaling) do not overflow the memory.<br>
38 : !>
39 : !> \interface{MAX_NUM_IMAGE}
40 : !> \code{.F90}
41 : !>
42 : !> use pm_parallelism, only: MAX_NUM_IMAGE
43 : !>
44 : !> print *, MAX_NUM_IMAGE
45 : !>
46 : !> \endcode
47 : !>
48 : !> \finmain{MAX_NUM_IMAGE}
49 : !>
50 : !> \author
51 : !> \AmirShahmoradi, Tuesday March 7, 2017, 3:50 AM, Institute for Computational Engineering and Sciences (ICES), The University of Texas at Austin
52 : !integer(IK) , parameter :: MAX_NUM_IMAGE = (huge(0_IK) - mod(huge(0_IK), 2_IK)) / 2_IK ! maximum number of images supported by this algorithm.
53 : integer(IK) , parameter :: POWER_BASE = 2_IK
54 : integer(IK) , parameter :: MAX_NUM_IMAGE = (huge(0_IK) - mod(huge(0_IK), POWER_BASE)) / POWER_BASE
55 : integer(IK) , parameter :: MID_NUM_IMAGE = 2**13_IK ! The number above which we switch to log-linear range.
56 : character(*, SK), parameter :: PROCEDURE_NAME = MODULE_NAME//SK_"@setForkJoinScaling()"
57 : real(RKC) :: contributionImageFirst, totalRunTime, parSecTimeEffective, comSecTimeTotal
58 : integer(IK) :: lenScaling, iell
59 : logical(LK) :: maxvalFound
60 :
61 28 : CHECK_ASSERTION(__LINE__, 0 <= seqSecTime, SK_"@setForkJoinScaling(): The condition `0 <= seqSecTime` must hold. seqSecTime = "//getStr(seqSecTime))
62 28 : CHECK_ASSERTION(__LINE__, 0 <= parSecTime, SK_"@setForkJoinScaling(): The condition `0 <= parSecTime` must hold. parSecTime = "//getStr(parSecTime))
63 28 : CHECK_ASSERTION(__LINE__, 0 <= comSecTime, SK_"@setForkJoinScaling(): The condition `0 <= comSecTime` must hold. comSecTime = "//getStr(comSecTime))
64 28 : CHECK_ASSERTION(__LINE__, 0 <= conProb .and. conProb <= 1, SK_"@setForkJoinScaling(): The condition `0. <= conProb .and. conProb <= 1.` must hold. conProb = "//getStr(conProb))
65 :
66 28 : totalRunTime = seqSecTime + parSecTime ! serial + parallel section runtime of the code per function call.
67 28 : if (0._RKC < totalRunTime) then
68 :
69 28 : if (present(scalingMinLen)) then
70 27 : CHECK_ASSERTION(__LINE__, 0 <= scalingMinLen, SK_"@setForkJoinScaling(): The condition `0 <= scalingMinLen` must hold. scalingMinLen = "//getStr(scalingMinLen))
71 27 : lenScaling = scalingMinLen
72 : else
73 1 : lenScaling = ceiling(2._RKC / max(conProb, 0.001_RKC), IK)
74 : end if
75 28 : call setResized(scaling, lenScaling)
76 28 : call setResized(numproc, lenScaling)
77 :
78 : iell = 1_IK
79 28 : numproc(iell) = 1_IK
80 28 : scalingMaxLoc = iell
81 28 : scalingMaxVal = 1._RKC
82 28 : scaling(iell) = scalingMaxVal
83 : maxvalFound = .false._LK
84 : loopScaling: do
85 1000 : if (numproc(iell) < MAX_NUM_IMAGE) then
86 972 : if (iell < lenScaling) then
87 914 : iell = iell + 1_IK
88 914 : if (0._RKC < comSecTime .and. numproc(iell - 1) < MID_NUM_IMAGE) then
89 134 : numproc(iell) = iell
90 : else
91 : ! Here, the scaling function is monotonically increasing.
92 : ! We cannot afford computing the scaling for each and every process count.
93 : ! As a compromise, we do it for a log-linear range.
94 780 : numproc(iell) = numproc(iell - 1) * POWER_BASE
95 : end if
96 : ! compute the fraction of work done by the first image.
97 914 : if (0._RKC < conProb) then
98 524 : contributionImageFirst = exp(getGeomCyclicLogPMF(stepSuccess = 1_IK, probSuccess = conProb, period = numproc(iell)))
99 : else ! if (conProb == 0._RKC) then
100 390 : contributionImageFirst = 1._RKC / numproc(iell)
101 : end if
102 : ! effective runtime of the parallel-section of the code, when executed in parallel on iell processes.
103 914 : parSecTimeEffective = parSecTime * contributionImageFirst
104 : ! Assume communication time grows linearly with the number of processes.
105 914 : comSecTimeTotal = (numproc(iell) - 1_IK) * comSecTime
106 914 : scaling(iell) = totalRunTime / (seqSecTime + parSecTimeEffective + comSecTimeTotal)
107 914 : if (scalingMaxVal < scaling(iell)) then
108 502 : scalingMaxVal = scaling(iell)
109 502 : scalingMaxLoc = iell
110 : else
111 : maxvalFound = .true._LK
112 : end if
113 58 : elseif (maxvalFound .and. scalingMaxLoc * 3 <= iell) then
114 : exit loopScaling
115 : else
116 56 : call setResized(numproc)
117 56 : call setResized(scaling)
118 56 : lenScaling = size(scaling, 1, IK)
119 : end if
120 : else
121 : exit loopScaling !error stop PROCEDURE_NAME//SK_": Failed to find the number of processes with which the maximum Fork-Join scaling occurs." ! LCOV_EXCL_LINE
122 : end if
123 : end do loopScaling
124 1940 : numproc = numproc(1 : iell)
125 1940 : scaling = scaling(1 : iell)
126 :
127 : else
128 :
129 : ! There is no code to execute, possibly only parallel overhead.
130 0 : numproc = [1_IK]
131 0 : scalingMaxLoc = 1_IK
132 0 : scalingMaxVal = 0_IK
133 0 : scaling = [scalingMaxVal]
134 :
135 : end if
136 :
137 : #else
138 : !%%%%%%%%%%%%%%%%%%%%%%%%
139 : #error "Unrecognized interface."
140 : !%%%%%%%%%%%%%%%%%%%%%%%%
141 : #endif
|