Line data Source code
1 : !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
2 : !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
3 : !!!!
4 : !!!! MIT License
5 : !!!!
6 : !!!! ParaMonte: plain powerful parallel Monte Carlo library.
7 : !!!!
8 : !!!! Copyright (C) 2012-present, The Computational Data Science Lab
9 : !!!!
10 : !!!! This file is part of the ParaMonte library.
11 : !!!!
12 : !!!! Permission is hereby granted, free of charge, to any person obtaining a
13 : !!!! copy of this software and associated documentation files (the "Software"),
14 : !!!! to deal in the Software without restriction, including without limitation
15 : !!!! the rights to use, copy, modify, merge, publish, distribute, sublicense,
16 : !!!! and/or sell copies of the Software, and to permit persons to whom the
17 : !!!! Software is furnished to do so, subject to the following conditions:
18 : !!!!
19 : !!!! The above copyright notice and this permission notice shall be
20 : !!!! included in all copies or substantial portions of the Software.
21 : !!!!
22 : !!!! THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
23 : !!!! EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
24 : !!!! MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
25 : !!!! IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
26 : !!!! DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
27 : !!!! OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE
28 : !!!! OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
29 : !!!!
30 : !!!! ACKNOWLEDGMENT
31 : !!!!
32 : !!!! ParaMonte is an honor-ware and its currency is acknowledgment and citations.
33 : !!!! As per the ParaMonte library license agreement terms, if you use any parts of
34 : !!!! this library for any purposes, kindly acknowledge the use of ParaMonte in your
35 : !!!! work (education/research/industry/development/...) by citing the ParaMonte
36 : !!!! library as described on this page:
37 : !!!!
38 : !!!! https://github.com/cdslaborg/paramonte/blob/main/ACKNOWLEDGMENT.md
39 : !!!!
40 : !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
41 : !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
42 :
43 : !> \brief This module contains procedures for computing the parallel performance of the parallel algorithms.
44 : !> \author Amir Shahmoradi
45 :
46 : module Parallelism_mod
47 :
48 : use Optimization_mod, only: PowellMinimum_type
49 : use Constants_mod, only: RK, IK
50 : use Err_mod, only: Err_type
51 :
52 : implicit none
53 :
54 : character(*), parameter :: MODULE_NAME = "@Parallelism_mod"
55 :
56 : !> The [Image_type](@ref image_type) class.
57 : type :: Image_type
58 : integer(IK) :: id !< The ID of the runtime parallel process: 1, 2, 3, ...
59 : integer(IK) :: count !< The total number of runtime parallel processes available.
60 : logical :: isFirst = .false. !< A logical flag indicating whether the current process is ID #1.
61 : logical :: isNotFirst = .false. !< A logical flag indicating whether the current process is NOT ID #1.
62 : logical :: isLeader = .false. !< A logical flag indicating whether the current process is a leader. This must be defined by the user.
63 : logical :: isRooter = .false. !< A logical flag indicating whether the current process is a follower. This must be defined by the user.
64 : character(:), allocatable :: name !< The name of the current process as a string with the format `"@process(id)"`.
65 : contains
66 : procedure, nopass :: sync => syncImages
67 : procedure, pass :: query => queryImage
68 : procedure, nopass :: finalize => finalizeImages
69 : end type Image_type
70 :
71 : type, private :: Maximum_type
72 : real(RK) :: value !< The maximum speedup attained (or attainable).
73 : integer(IK) :: nproc !< The required number of processes for maximum speedup.
74 : end type Maximum_type
75 :
76 : type, private :: Speedup_type
77 : integer(IK) :: count !< The size of the `Scaling` vector.
78 : real(RK) :: current !< The speedup given the current available number of processes.
79 : type(Maximum_type) :: Maximum !< An object of type [Maximum_type](@ref maximum_type), containing the predicted maximum speedup and process count.
80 : real(RK) , allocatable :: Scaling(:) !< A real vector of length `Speedup_type%count` containing the predicted speedup for a range process counts.
81 : end type Speedup_type
82 :
83 : type, private :: UniqueProcess_type
84 : integer(IK) :: count !< The sizes of the two vector components `Identity` and `Frequency` of `UniqueProcess_type`.
85 : integer(IK) , allocatable :: Identity(:) !< A vector of size `UniqueProcess_type%count` containing the unique IDs of processes, i.e., the ranks of processes, starting from 1.
86 : integer(IK) , allocatable :: Frequency(:) !< The frequency with which the process IDs have contributed to the simulation at hand.
87 : end type UniqueProcess_type
88 :
89 : type, private, extends(UniqueProcess_type) :: Contribution_type
90 : real(RK) , allocatable :: LogFrequency(:) !< The natural logarithm of the `Frequency` vector component of the type [UniqueProcess_type](@ref uniqueprocess_type).
91 : end type Contribution_type
92 :
93 : type, private :: SuccessProb_type
94 : real(RK) :: current !< The probability of success in the Bernoulli trial. For example, the sampling efficiency of the ParaDRAM sampler.
95 : real(RK) :: effective !< The computed effective probability of success inferred from fitting the Cyclic Geometric distribution.
96 : type(PowellMinimum_type) :: PowellMinimum !< An object of class [PowellMinimum_type](@ref optimization_mod::powellminimum_type) containing
97 : !< information about the Cyclic Geometric fit to process contribution data.
98 : end type SuccessProb_type
99 :
100 : !> The `ForkJoin_type` class.
101 : type :: ForkJoin_type
102 : type(UniqueProcess_type) :: UniqueProcess
103 : type(Contribution_type) :: Contribution !< Contains information similar to `UniqueProcess`, but including all processes, even zero contributors.
104 : type(SuccessProb_type) :: SuccessProb
105 : type(Speedup_type) :: Speedup
106 : type(Err_type) :: Err
107 : contains
108 : procedure , nopass :: getForkJoinSpeedup
109 : end type ForkJoin_type
110 :
111 : interface ForkJoin_type
112 : module procedure :: constructForkJoin
113 : end interface ForkJoin_type
114 :
115 : !%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
116 :
117 : contains
118 :
119 : !%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
120 :
121 : !> \brief
122 : !> Return the statistics of the parallel processors available, depending on the type of parallelism requested.
123 : !> This is a dynamic member of the [Image_type](@ref image_type) class.
124 : !>
125 : !> @param[inout] Image : An object of class [Image_type](@ref image_type).
126 : !> On output, all properties of `Image` will be reset,
127 : !> except the attributes `isLeader` and `isRooter`.
128 : !>
129 : !> \warning
130 : !> This routine must not contain any synchronization statements under any circumstances.
131 1056 : subroutine queryImage(Image)
132 : #if INTEL_COMPILER_ENABLED && defined DLL_ENABLED && (OS_IS_WINDOWS || defined OS_IS_DARWIN)
133 : !DEC$ ATTRIBUTES DLLEXPORT :: queryImage
134 : #endif
135 : use Constants_mod, only: RK, IK
136 : use String_mod, only: num2str
137 : implicit none
138 : class(Image_type), intent(inout) :: Image
139 :
140 : ! setup general processor / coarray image variables
141 :
142 : #if defined CAF_ENABLED
143 : Image%id = this_image()
144 : Image%count = num_images()
145 : #elif defined MPI_ENABLED
146 : block
147 : use mpi
148 : integer(IK) :: ierrMPI
149 : logical :: isInitialized
150 1056 : call mpi_initialized( isInitialized, ierrMPI )
151 1056 : if (.not. isInitialized) call mpi_init(ierrMPI)
152 1056 : call mpi_comm_rank(mpi_comm_world, Image%id, ierrMPI)
153 1056 : call mpi_comm_size(mpi_comm_world, Image%count, ierrMPI)
154 1056 : Image%id = Image%id + 1_IK ! make the ranks consistent with Fortran coarray indexing conventions
155 : end block
156 : #else
157 : Image%id = 1_IK
158 : Image%count = 1_IK
159 : #endif
160 1056 : Image%name = "@process(" // num2str(Image%id) // ")"
161 1056 : Image%isFirst = Image%id==1_IK
162 1056 : Image%isNotFirst = Image%id/=1_IK
163 : !Image%isLeader = .false. ! ATTN: this is to be set by the user at runtime, depending on the type of parallelism.
164 : !Image%isRooter = .false. ! ATTN: this is to be set by the user at runtime, depending on the type of parallelism.
165 :
166 1056 : end subroutine queryImage
167 :
168 : !%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
169 :
170 : !> \brief
171 : !> Synchronize all existing parallel images and return nothing.
172 : !> This is a static member of the [Image_type](@ref image_type) class.
173 : !>
174 : !> \warning
175 : !> This routine global Coarray and MPI synchronization barriers and
176 : !> therefore, must be called by all processes in the current simulation.
177 10488 : subroutine syncImages()
178 : #if INTEL_COMPILER_ENABLED && defined DLL_ENABLED && (OS_IS_WINDOWS || defined OS_IS_DARWIN)
179 : !DEC$ ATTRIBUTES DLLEXPORT :: syncImages
180 : #endif
181 : implicit none
182 : #if defined MPI_ENABLED
183 : block
184 : use mpi
185 : integer(IK) :: ierrMPI
186 : !logical :: isFinalized
187 : !call mpi_finalized( isFinalized, ierrMPI )
188 : !if (.not. isFinalized) then
189 10488 : call mpi_barrier(mpi_comm_world,ierrMPI)
190 : !end if
191 : end block
192 : #elif defined CAF_ENABLED
193 : sync all
194 : #endif
195 1056 : end subroutine syncImages
196 :
197 : !%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
198 :
199 : !> \brief
200 : !> Finalize the current parallel simulation and return nothing.
201 : !> This is a static member of the [Image_type](@ref image_type) class.
202 : !>
203 : !> \warning
204 : !> MPI communications will be shut down upon calling this routine and further interprocess communications will be impossible.
205 : subroutine finalizeImages() ! LCOV_EXCL_LINE
206 : #if INTEL_COMPILER_ENABLED && defined DLL_ENABLED && (OS_IS_WINDOWS || defined OS_IS_DARWIN)
207 : !DEC$ ATTRIBUTES DLLEXPORT :: finalizeImages
208 : #endif
209 : #if defined CAF_ENABLED
210 : sync all
211 : #elif defined MPI_ENABLED
212 10488 : use mpi
213 : implicit none
214 : integer(IK) :: ierrMPI
215 : logical :: isFinalized
216 3 : call mpi_finalized( isFinalized, ierrMPI )
217 3 : if (.not. isFinalized) then
218 3 : call mpi_barrier(mpi_comm_world,ierrMPI)
219 3 : call mpi_finalize(ierrMPI)
220 : end if
221 : #endif
222 3 : end subroutine finalizeImages
223 :
224 : !%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
225 :
226 : !> \brief
227 : !> This is the constructor of the [ForkJoin_type](@ref forkjoin_type) class.
228 : !> Return the predicted speedup of the parallel simulation given the input characteristics and timing information of the simulation.
229 : !>
230 : !> @param[in] processCount : The number of processes in the simulation.
231 : !> @param[in] lenProcessID : The length of `ProcessID` vector.
232 : !> @param[in] ProcessID : The vector of process IDs.
233 : !> @param[in] successProb : The success probability (the effective acceptance rate per objective function call).
234 : !> @param[in] seqSecTime : The timing of the sequential sections of the code.
235 : !> @param[in] parSecTime : The timing of the parallel sections of the code.
236 : !> @param[in] comSecTime : The timing of the communication sections of the code.
237 : !>
238 : !> \return
239 : !> `ForkJoin` : An object of class [ForkJoin_type](@ref forkjoin_type) containing the parallelization speedup.
240 493 : function constructForkJoin(processCount, lenProcessID, ProcessID, successProb, seqSecTime, parSecTime, comSecTime) result(ForkJoin) ! nonpure
241 : #if INTEL_COMPILER_ENABLED && defined DLL_ENABLED && (OS_IS_WINDOWS || defined OS_IS_DARWIN)
242 : !DEC$ ATTRIBUTES DLLEXPORT :: constructForkJoin
243 : #endif
244 : use GeoCyclicFit_mod, only: fitGeoCyclicLogPDF ! LCOV_EXCL_LINE
245 : use Constants_mod, only: IK, RK, SQRT_EPS_RK, NEGINF_RK
246 : use String_mod, only: num2str
247 : use Misc_mod, only: findUnique
248 : use Sort_mod, only: indexArray
249 :
250 : implicit none
251 :
252 : integer(IK) , intent(in) :: processCount, lenProcessID, ProcessID(lenProcessID)
253 : real(RK) , intent(in) :: successProb, seqSecTime, parSecTime, comSecTime
254 : type(ForkJoin_type) :: ForkJoin
255 :
256 : character(*), parameter :: PROCEDURE_NAME = MODULE_NAME // "@constructForkJoin()"
257 : real(RK) , parameter :: LOG_HALF = -0.693147180559945_RK
258 :
259 : integer(IK) , allocatable :: Indx(:)
260 : integer(IK) :: i
261 :
262 : !%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
263 : ! check for invalid input
264 : !%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
265 :
266 493 : ForkJoin%Err%occurred = .false.
267 :
268 493 : if (successProb<-SQRT_EPS_RK .or. successProb>1._RK+SQRT_EPS_RK) then
269 6 : ForkJoin%Err%occurred = .true.
270 6 : ForkJoin%Err%msg = PROCEDURE_NAME // ": successProb must be a number between zero and one. The input value is: " // num2str(successProb)
271 6 : return
272 : end if
273 :
274 487 : if (processCount<1_IK) then
275 3 : ForkJoin%Err%occurred = .true.
276 3 : ForkJoin%Err%msg = PROCEDURE_NAME // ": processCount cannot be less than one. The input value is: " // num2str(processCount)
277 3 : return
278 : end if
279 :
280 484 : if (processCount==1_IK .or. successProb <= SQRT_EPS_RK .or. successProb >= 1._RK+SQRT_EPS_RK) then
281 269 : ForkJoin%Speedup%Maximum%value = 1._RK
282 269 : ForkJoin%Speedup%Maximum%nproc = 1_IK
283 269 : ForkJoin%Speedup%current = 1._RK
284 269 : ForkJoin%Speedup%count = 1_IK
285 538 : ForkJoin%Speedup%Scaling = [1._RK]
286 269 : ForkJoin%Contribution%count = processCount
287 2393 : ForkJoin%Contribution%Identity = [( i, i = 1, processCount )]
288 977 : allocate(ForkJoin%Contribution%Frequency(processCount), source = 0_IK)
289 269 : ForkJoin%Contribution%Frequency(1) = lenProcessID
290 977 : allocate(ForkJoin%Contribution%LogFrequency(processCount), source = NEGINF_RK)
291 269 : ForkJoin%Contribution%LogFrequency(1) = log(real(ForkJoin%Contribution%Frequency(1),kind=RK))
292 269 : ForkJoin%UniqueProcess%count = 1_IK
293 538 : ForkJoin%UniqueProcess%Identity = [1_IK]
294 538 : ForkJoin%UniqueProcess%Frequency = [lenProcessID]
295 269 : ForkJoin%SuccessProb%current = successProb
296 269 : ForkJoin%SuccessProb%effective = successProb
297 269 : return
298 : end if
299 :
300 : !%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
301 : ! compute the effective successProb (MCMC efficiency) from the Process contributions
302 : !%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
303 :
304 : call findUnique ( lenVector = lenProcessID &
305 : , Vector = ProcessID &
306 : , UniqueValue = ForkJoin%UniqueProcess%Identity &
307 : , UniqueCount = ForkJoin%UniqueProcess%Frequency &
308 : , lenUnique = ForkJoin%UniqueProcess%count &
309 215 : )
310 : !maxContributorProcessID = ForkJoin%UniqueProcess%Identity(maxloc(ForkJoin%UniqueProcess%Frequency))
311 :
312 : ! Sort ascending. This rearrangement may not be necessary anymore, since fitGeoCyclicLogPDF() is now order-agnostic.
313 : ! From the IO report perspective, however, it is important to have it ordered.
314 :
315 215 : allocate(Indx(ForkJoin%UniqueProcess%count))
316 215 : call indexArray( n = ForkJoin%UniqueProcess%count, Array = ForkJoin%UniqueProcess%Identity, Indx = Indx, Err = ForkJoin%Err )
317 215 : if (ForkJoin%Err%occurred) then
318 : ! LCOV_EXCL_START
319 : ForkJoin%Err%msg = PROCEDURE_NAME // ForkJoin%Err%msg
320 : return
321 : ! LCOV_EXCL_STOP
322 : end if
323 :
324 : ! get all processes contributions
325 :
326 1557 : ForkJoin%UniqueProcess%Identity(:) = ForkJoin%UniqueProcess%Identity(Indx)
327 1557 : ForkJoin%UniqueProcess%Frequency(:) = ForkJoin%UniqueProcess%Frequency(Indx)
328 :
329 215 : deallocate(Indx)
330 :
331 215 : ForkJoin%Contribution%count = processCount
332 215 : if (allocated(ForkJoin%Contribution%LogFrequency)) deallocate(ForkJoin%Contribution%LogFrequency);
333 890 : allocate(ForkJoin%Contribution%LogFrequency(ForkJoin%Contribution%count), source = 0._RK)
334 215 : if (allocated(ForkJoin%Contribution%Frequency)) deallocate(ForkJoin%Contribution%Frequency);
335 890 : allocate(ForkJoin%Contribution%Frequency(ForkJoin%Contribution%count), source = 0_IK)
336 215 : if (allocated(ForkJoin%Contribution%Identity)) deallocate(ForkJoin%Contribution%Identity);
337 215 : allocate(ForkJoin%Contribution%Identity(ForkJoin%Contribution%count))
338 :
339 1557 : ForkJoin%Contribution%Frequency(ForkJoin%UniqueProcess%Identity) = ForkJoin%UniqueProcess%Frequency
340 2455 : ForkJoin%Contribution%Identity = [(i, i = 1, ForkJoin%Contribution%count)]
341 215 : if (ForkJoin%Contribution%Frequency(1)==0_IK) then
342 : ! LCOV_EXCL_START
343 : ForkJoin%Err%occurred = .true.
344 : ForkJoin%Err%msg = PROCEDURE_NAME//": The contribution of the first process to the simulation is zero. This is highly unusual and requires further investigation."
345 : return
346 : ! LCOV_EXCL_STOP
347 : end if
348 890 : do i = 1, ForkJoin%Contribution%count
349 : ! TODO: xxx a better solution instead of this ad-hoc approach would be to fit for the GeoCyclicCDF. Amir.
350 890 : if (ForkJoin%Contribution%Frequency(i)/=0_IK) then
351 671 : ForkJoin%Contribution%LogFrequency(i) = log(real(ForkJoin%Contribution%Frequency(i),kind=RK))
352 : else
353 4 : ForkJoin%Contribution%LogFrequency(i) = ForkJoin%Contribution%LogFrequency(i-1) + LOG_HALF
354 : end if
355 : end do
356 :
357 215 : ForkJoin%SuccessProb%current = successProb
358 :
359 : ! fit for the effective successProb
360 :
361 : ForkJoin%SuccessProb%PowellMinimum = fitGeoCyclicLogPDF ( maxNumTrial = processCount & ! LCOV_EXCL_LINE
362 : , numTrial = ForkJoin%Contribution%count & ! LCOV_EXCL_LINE
363 : , SuccessStep = ForkJoin%Contribution%Identity & ! LCOV_EXCL_LINE
364 : , LogCount = ForkJoin%Contribution%LogFrequency & ! LCOV_EXCL_LINE
365 215 : )
366 215 : if (ForkJoin%SuccessProb%PowellMinimum%Err%occurred) then
367 : ! LCOV_EXCL_START
368 : ForkJoin%Err = ForkJoin%SuccessProb%PowellMinimum%Err
369 : ForkJoin%Err%msg = PROCEDURE_NAME // ForkJoin%Err%msg
370 : return
371 : ! LCOV_EXCL_STOP
372 : end if
373 :
374 215 : ForkJoin%SuccessProb%effective = ForkJoin%SuccessProb%PowellMinimum%xmin(1)
375 :
376 : ! fit for the effective successProb
377 :
378 : call getForkJoinSpeedup ( successProb = ForkJoin%SuccessProb%effective & ! max(mcmcSamplingEfficiency, ForkJoin%SuccessProb%PowellMinimum%xmin(1)) & ! avoid unstable estimates of the effective efficiency.
379 : , seqSecTime = epsilon(seqSecTime) & ! time cost of the sequential section of the code, which is negligible here.
380 : , parSecTime = parSecTime & ! the serial runtime for the parallel section of the code.
381 : , comSecTimePerProc = comSecTime / (processCount - 1) & ! the communication overhead for each additional image beyond the leader.
382 : , minMaxNumProc = 2 * processCount & ! speedup will be computed at least up to this process count, if not more.
383 : , Speedup = ForkJoin%Speedup%Scaling & ! returned Speedup.
384 : , lenSpeedup = ForkJoin%Speedup%count & ! length of the returned `Speedup%Scaling` vector.
385 : , maxSpeedup = ForkJoin%Speedup%Maximum%value & ! returned maximum speedup.
386 : , maxSpeedupNumProc = ForkJoin%Speedup%Maximum%nproc & ! returned number of processes for maximum speedup.
387 : , Err = ForkJoin%Err &
388 215 : )
389 215 : if (ForkJoin%Err%occurred) then
390 : ! LCOV_EXCL_START
391 : ForkJoin%Err%msg = PROCEDURE_NAME // ForkJoin%Err%msg
392 : return
393 : ! LCOV_EXCL_STOP
394 : end if
395 215 : ForkJoin%Speedup%current = ForkJoin%Speedup%Scaling(processCount)
396 :
397 923 : end function constructForkJoin
398 :
399 : !%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
400 :
401 : !> \brief
402 : !> Predict the parallel simulation speedup for a range of possible processor counts.
403 : !>
404 : !> @param[in] successProb : The success probability (the effective acceptance rate per objective function call).
405 : !> @param[in] seqSecTime : The timing of the sequential sections of the code.
406 : !> @param[in] parSecTime : The timing of the parallel sections of the code.
407 : !> @param[in] comSecTimePerProc : The timing of the communication sections of the code per processor.
408 : !> @param[in] minMaxNumProc : The minimum number of processes for which the speedup will be computed. It must be at least 1.
409 : !> @param[out] Speedup : The vector of speedup values for different counts of processes.
410 : !> @param[out] lenSpeedup : The length of the vector `Speedup`.
411 : !> @param[out] maxSpeedupNumProc : The number of processes at which the maximum speedup happens.
412 : !> @param[out] maxSpeedup : The maximum speedup for any number of processors.
413 : !> @param[out] Err : An object of [Err_type](@ref err_mod::err_type) class indicating whether and error has occurred upon return.
414 430 : subroutine getForkJoinSpeedup(successProb, seqSecTime, parSecTime, comSecTimePerProc, minMaxNumProc, Speedup, lenSpeedup, maxSpeedupNumProc, maxSpeedup, Err)
415 : #if INTEL_COMPILER_ENABLED && defined DLL_ENABLED && (OS_IS_WINDOWS || defined OS_IS_DARWIN)
416 : !DEC$ ATTRIBUTES DLLEXPORT :: getForkJoinSpeedup
417 : #endif
418 493 : use Statistics_mod, only: getLogProbGeoCyclic
419 : use Constants_mod, only: IK, RK
420 : use String_mod, only: num2str
421 : use Misc_mod, only: resize
422 :
423 : implicit none
424 :
425 : real(RK) , intent(in) :: successProb, seqSecTime, parSecTime, comSecTimePerProc
426 : integer(IK) , intent(in) :: minMaxNumProc ! must be at least 1
427 : real(RK) , intent(out), allocatable :: Speedup(:)
428 : integer(IK) , intent(out) :: lenSpeedup
429 : real(RK) , intent(out) :: maxSpeedup
430 : integer(IK) , intent(out) :: maxSpeedupNumProc
431 : type(Err_type) , intent(out), optional :: Err
432 :
433 : character(*) , parameter :: PROCEDURE_NAME = MODULE_NAME // "@constructForkJoin()"
434 : integer(IK) , parameter :: ABS_MAX_NUM_PROC = 1000000_IK
435 :
436 430 : real(RK) :: FirstImageContribution(1)
437 215 : real(RK) :: serialRuntime, parSecTimePerProc, comSecTime
438 : integer(IK) :: numProc, lenSpeedupNew !, maxNumProc
439 : logical :: maxSpeedupFound, isPresentErr
440 : integer(IK), parameter :: SuccessStep(*) = [1_IK]
441 :
442 215 : isPresentErr = present(Err)
443 215 : if (isPresentErr) Err%occurred = .false.
444 :
445 215 : lenSpeedup = minMaxNumProc
446 215 : allocate(Speedup(lenSpeedup))
447 :
448 215 : numProc = 1
449 215 : Speedup(numProc) = 1._RK
450 215 : maxSpeedup = Speedup(numProc)
451 215 : maxSpeedupNumProc = numProc
452 215 : serialRuntime = seqSecTime + parSecTime ! serial runtime of the code per function call
453 :
454 215 : maxSpeedupFound = .false.
455 1807 : loopOverProc: do
456 :
457 2022 : numProc = numProc + 1
458 :
459 : ! resize allocation if needed
460 :
461 2022 : if (numProc>lenSpeedup) then
462 233 : if (maxSpeedupFound) exit loopOverProc
463 18 : lenSpeedupNew = 2 * lenSpeedup
464 18 : call resize(Speedup, from = lenSpeedup, to = lenSpeedupNew)
465 18 : lenSpeedup = lenSpeedupNew
466 : end if
467 :
468 : ! compute the fraction of work done by the first image
469 :
470 1807 : if (successProb==0._RK) then
471 0 : FirstImageContribution(1) = 1._RK / numProc
472 : else
473 :
474 : FirstImageContribution = exp(getLogProbGeoCyclic( successProb = successProb &
475 : , maxNumTrial = numProc &
476 : , numTrial = 1_IK &
477 : , SuccessStep = SuccessStep &
478 3614 : ) )
479 : end if
480 :
481 : ! effective runtime of the parallel-section of the code, when executed in parallel on numProc processes
482 :
483 1807 : parSecTimePerProc = parSecTime * FirstImageContribution(1)
484 :
485 : ! communication time grows linearly with the number of processes.
486 :
487 1807 : comSecTime = (numProc-1_IK) * comSecTimePerProc
488 :
489 1807 : Speedup(numProc) = serialRuntime / (seqSecTime + parSecTimePerProc + comSecTime)
490 1807 : if (maxSpeedup < Speedup(numProc)) then
491 469 : maxSpeedup = Speedup(numProc)
492 469 : maxSpeedupNumProc = numProc
493 : else
494 1338 : maxSpeedupFound = .true.
495 : end if
496 :
497 : if (numProc<ABS_MAX_NUM_PROC) cycle loopOverProc ! LCOV_EXCL_LINE
498 :
499 : ! LCOV_EXCL_START
500 : if (isPresentErr) then
501 : Err%occurred = .true.
502 : Err%msg = PROCEDURE_NAME // &
503 : ": Failed to find the number of processes with which the maximum speedup occurs. The search continued up to " // &
504 : num2str(ABS_MAX_NUM_PROC) // " processes."
505 : end if
506 : return
507 : ! LCOV_EXCL_STOP
508 :
509 : end do loopOverProc
510 :
511 215 : end subroutine getForkJoinSpeedup
512 :
513 : !%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
514 :
515 : end module Parallelism_mod ! LCOV_EXCL_LINE
|