Line data Source code
1 : !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
2 : !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
3 : !!!! !!!!
4 : !!!! ParaMonte: Parallel Monte Carlo and Machine Learning Library. !!!!
5 : !!!! !!!!
6 : !!!! Copyright (C) 2012-present, The Computational Data Science Lab !!!!
7 : !!!! !!!!
8 : !!!! This file is part of the ParaMonte library. !!!!
9 : !!!! !!!!
10 : !!!! LICENSE !!!!
11 : !!!! !!!!
12 : !!!! https://github.com/cdslaborg/paramonte/blob/main/LICENSE.md !!!!
13 : !!!! !!!!
14 : !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
15 : !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
16 :
17 : #if CAF_ENABLED || MPI_ENABLED || OMP_ENABLED
18 : #define SET_PARALLEL(X)X
19 : #else
20 : #define SET_PARALLEL(X)
21 : #endif
22 : #if CFI_ENABLED
23 : #define SHAPE_ARG, ndim
24 : #else
25 : #define SHAPE_ARG
26 : #endif
27 : ! \warning
28 : ! The MPI library real kinds do not fully match all available Fortran kinds.
29 : ! As such, instead of using the MPI intrinsic explicit real kinds
30 : ! we pass real data as `mpi_byte` data type.
31 : ! This method, however, has a caveat when MPI communicates data between systems with different endianness.
32 : ! When data is transferred as bytes, the endianness is not automatically taken into account.
33 : ! As such, the transferred values may not be meaningful.
34 : ! For now, we accept this risk as the odds of mixed endianness on the existing contemporary supercomputers is low.
35 : ! This may, however, need a more robust solution in the future.
36 : ! One possible solution might be to build an appropriate real data type using the MPI intrinsic `mpi_type_create_struct()`.
37 : integer , parameter :: NBRK = c_sizeof(0._RKC) ! Number of bytes in the current real kind.
38 : #if OMP_ENABLED && (MATLAB_ENABLED || PYTHON_ENABLED || R_ENABLED)
39 : real(RKC) :: mold
40 : #elif OMP_ENABLED
41 : integer(IK) , parameter :: CACHELINE = 128_IK * 8_IK
42 : integer(IK) , parameter :: REAL_SIZE = storage_size(0._RKC)
43 : integer(IK) , parameter :: JUMP = 1_IK + CACHELINE / REAL_SIZE
44 : real(RKC) :: logFuncState(JUMP, spec%image%count)
45 : #endif
46 : character(*, SK) , parameter :: PROCEDURE_NAME = MODULE_NAME//SK_"@getErrKernelRun()"
47 : integer(IK) , parameter :: CHAIN_RESTART_OFFSET = 2_IK
48 : real(RKC) :: sumAccrAccRejDel ! sum(accr since start) for accepted/rejected/delayed proposals: used to figure out the average acceptance ratio for the entire chain.
49 : real(RKC) :: sumAccrAccRej ! sum(accr since start) for accepted/rejected proposals: used to figure out the average acceptance ratio for the entire chain.
50 : integer , allocatable :: pos(:)
51 : integer :: ndimp1
52 : integer(IK) :: ndim
53 : integer(IK) :: numFunCallAcceptedLastAdaptation ! number of function calls accepted at Last proposal adaptation occurrence
54 : integer(IK) :: counterAUP ! counter for proposalAdaptationPeriod
55 : integer(IK) :: counterAUC ! counter for proposalAdaptationCount
56 : integer(IK) :: counterDRS ! counter for Delayed Rejection Stages
57 : integer(IK) :: lastStateWeight ! This is used for passing the most recent verbose chain segment to the adaptive updater of the sampler
58 : integer(IK) :: currentStateWeight ! counter for SampleWeight, used only in restart mode
59 : integer(IK) :: numFunCallAcceptedPlusOne ! counter for SampleWeight, used only in restart mode
60 : real(RKC) :: meanAccRateSinceStart ! used for restart file read. xxx \todo: this could be merged with stat%cfc%meanAcceptanceRate.
61 : real(RKC) :: logFuncDiff ! The difference between the log of the old and the new states. Used to avoid underflow.
62 : integer(IK) :: dumint
63 : logical(LK) :: adaptationIsGreedy
64 : logical(LK) :: adaptationSucceeded
65 : integer(IK) :: adaptationMeasureLen
66 : integer(IK) :: acceptedRejectedDelayedUnusedRestartMode
67 : !real(RKC) :: adaptationMeasureDummy
68 : real(RKC) , allocatable :: adaptationMeasure(:)
69 : integer :: imageStartID, imageEndID, imageID ! This must be default integer, at least for MPI.
70 : #if OMP_ENABLED
71 : #define CO_LOGFUNCSTATE(I,J,K)co_logFuncState(I,J,K)
72 : #define CO_ACCR(I,J)co_accr(I,J)
73 : #define GET_ELL(X,I)X(I)
74 : #define SET_CONOMP(X)X
75 : integer :: co_proposalFound_samplerUpdateOccurred(2) ! merging these scalars would reduce the MPI communication overhead cost: co_proposalFound, co_samplerUpdateOccurred, co_counterDRS, 0 means false, 1 means true
76 : real(RKC) :: maxLogFuncRejectedProposal(spec%image%count) ! used in delayed rejection sampling.
77 : real(RKC) :: unifrnd(spec%image%count) ! used for random number generation.
78 : real(RKC) , allocatable :: co_logFuncState(:,:,:) ! (0 : ndim, 1 : njob, -1 : proposalDelayedRejectionCount), -1 is the current accepted state.
79 : real(RKC) , allocatable :: co_accr(:,:) ! (1 : njob, -1 : proposalDelayedRejectionCount)
80 : #else
81 : #define CO_LOGFUNCSTATE(I,J,K)co_logFuncState(I,K)
82 : #define CO_ACCR(I,J)co_accr(J)
83 : #define GET_ELL(X,I)X
84 : #define SET_CONOMP(X)
85 : real(RKC) :: maxLogFuncRejectedProposal ! used in delayed rejection sampling.
86 : real(RKC) :: unifrnd ! used for random number generation.
87 : #if CAF_ENABLED
88 : integer , save :: co_proposalFound_samplerUpdateOccurred(2)[*] ! merging these scalars would reduce the MPI communication overhead cost: co_proposalFound, co_samplerUpdateOccurred, co_counterDRS, 0 means false, 1 means true.
89 : real(RKC) , allocatable :: co_logFuncState(:,:)[:] ! (0 : ndim, -1 : proposalDelayedRejectionCount), -1 is the current accepted state.
90 : real(RKC) , allocatable :: co_accr(:)[:] ! (1 : njob, -1 : proposalDelayedRejectionCount)
91 : #else
92 : integer :: co_proposalFound_samplerUpdateOccurred(2) ! merging these scalars would reduce the MPI communication overhead cost: co_proposalFound, co_samplerUpdateOccurred, co_counterDRS, 0 means false, 1 means true
93 : real(RKC) , allocatable :: co_logFuncState(:,:) ! (0 : ndim, -1 : proposalDelayedRejectionCount), -1 is the current accepted state.
94 : real(RKC) , allocatable :: co_accr(:) ! (1 : njob, -1 : proposalDelayedRejectionCount)
95 : #endif
96 : #if CAF_ENABLED || MPI_ENABLED
97 : #if MPI_ENABLED
98 : integer :: proposalDelayedRejectionCountPlusTwo
99 : integer :: proposalFoundSinglChainModeReduced ! the reduced value by summing proposalFoundSinglChainModeReduced over all images.
100 : real(RKC) , allocatable :: accrMat(:,:) ! matrix of size (-1:spec%proposalDelayedRejectionCount%val,1:spec%image%count).
101 : integer :: ierrMPI
102 : #endif
103 : integer :: proposalFoundSinglChainMode ! used in singleChain delayed rejection. zero if the proposal is not accepted. 1 if the proposal is accepted.
104 : #endif
105 : #endif
106 : #if !(CAF_ENABLED || MPI_ENABLED || OMP_ENABLED)
107 : parameter(imageID = 1, imageStartID = 1, imageEndID = 1) ! needed even in the case of serial run to assign a proper value to stat%cfc%processID
108 : #endif
109 13 : stat%avgCommPerFunCall = 0._RKD ! Until the reporting time, this is in reality, sumCommTimePerFunCall. This is meaningful only in singleChain parallelism.
110 13 : stat%numFunCallAcceptedRejectedDelayedUnused = spec%image%count ! used only in singleChain parallelism, and relevant only on the first image.
111 13 : ndim = spec%ndim%val
112 : ndimp1 = int(ndim + 1)
113 : err%occurred = .false._LK
114 13 : err%msg = repeat(SK_" ", 255)
115 : acceptedRejectedDelayedUnusedRestartMode = 0_IK ! used to compute more accurate timings in the restart mode.
116 13 : stat%avgTimePerFunCall = 0._RKD
117 13 : if (allocated(co_accr)) deallocate(co_accr)
118 13 : if (allocated(co_logFuncState)) deallocate(co_logFuncState)
119 : #if CAF_ENABLED
120 : allocate(co_logFuncState(0 : ndim, -1 : spec%proposalDelayedRejectionCount%val)[*])
121 : allocate(co_accr(-1 : spec%proposalDelayedRejectionCount%val)[*]) ! the negative element will contain counterDRS.
122 : #else
123 26 : allocate(CO_LOGFUNCSTATE(0 : ndim, 1 : spec%image%count, -1 : spec%proposalDelayedRejectionCount%val))
124 13 : allocate(CO_ACCR(1 : spec%image%count, -1 : spec%proposalDelayedRejectionCount%val)) ! the negative element will contain counterDRS.
125 : #endif
126 13 : CO_ACCR(:, 0) = 1._RKC ! initial acceptance rate for the first zeroth DR stage.
127 13 : CO_ACCR(:, -1) = 0._RKC ! the real-valued counterDRS, indicating the initial delayed rejection stage at which the first point is sampled.
128 23 : CO_ACCR(:, 1 : spec%proposalDelayedRejectionCount%val) = 0._RKC ! indicates the very first proposal acceptance on image 1.
129 : #if MPI_ENABLED
130 : if (allocated(accrMat)) deallocate(accrMat)
131 : allocate(accrMat(-1 : spec%proposalDelayedRejectionCount%val, 1 : spec%image%count)) ! the negative element will contain counterDRS.
132 : accrMat = 0._RKC ! -huge(1._RKC) ! debug
133 : accrMat(0, 1 : spec%image%count) = 1._RKC ! initial acceptance rate for the first zeroth DR stage.
134 : proposalDelayedRejectionCountPlusTwo = (spec%proposalDelayedRejectionCount%val + 2) * NBRK
135 : !proposalDelayedRejectionCountPlusTwo = spec%proposalDelayedRejectionCount%val + 2
136 : #endif
137 13 : if (proposal%delRejEnabled) then
138 2 : stat%numFunCallAcceptedRejectedDelayed = 0_IK ! Markov Chain counter
139 0 : sumAccrAccRejDel = 0._RKC ! sum of acceptance rate
140 : end if
141 : ! adaptationMeasure = 0._RKC ! needed for the first output
142 13 : adaptationSucceeded = .true._LK ! needed to set up lastStateWeight and numFunCallAcceptedLastAdaptation for the first accepted proposal.
143 0 : sumAccrAccRej = 0._RKC ! sum of acceptance rate
144 : counterAUC = 0_IK ! counter for pproposalAdaptationCount.
145 : counterAUP = 0_IK ! counter for proposalAdaptationPeriod.
146 13 : stat%numFunCallAccepted = 0_IK ! Markov Chain acceptance counter.
147 13 : stat%numFunCallAcceptedRejected = 0_IK ! Markov Chain counter
148 : numFunCallAcceptedLastAdaptation = 0_IK
149 : lastStateWeight = -huge(lastStateWeight)
150 13 : meanAccRateSinceStart = 1._RKC ! needed for the first restart output in fresh run.
151 :
152 13 : adaptationMeasureLen = 100_IK
153 13 : blockNewDryRun: if (spec%run%is%new) then
154 13 : RETURN_IF_FAILED(__LINE__,FAILED_IMAGE(isFailedChainResize(stat%cfc, ndim, spec%outputChainSize%val, err%msg)),SK_"Insufficient memory allocation space for the output chain file contents.")
155 : else blockNewDryRun
156 : ! Load the existing Chain file into stat%cfc components.
157 0 : err = getErrChainRead(stat%cfc, spec%chainFile%file, spec%outputChainFileFormat%val, pre = spec%outputChainSize%val, pos = pos)
158 : !err = getErrChainWrite(stat%cfc, spec%chainFile%file//".temp", spec%outputChainFileFormat%val, spec%chainFile%format%rows, 1_IK, stat%cfc%nsam, spec%proposalAdaptationPeriod%val)
159 0 : RETURN_IF_FAILED(__LINE__,FAILED_IMAGE(err%occurred),err%msg)
160 0 : if (err%iswarned .and. spec%image%is%first) call spec%disp%warn%show(PROCEDURE_NAME//getFine(__FILE__, __LINE__)//SK_": "//err%msg)
161 0 : if (0 < stat%cfc%nsam) adaptationMeasureLen = maxval(stat%cfc%sampleWeight(1 : stat%cfc%nsam))
162 0 : spec%run%is%new = stat%cfc%nsam <= CHAIN_RESTART_OFFSET
163 0 : spec%run%is%dry = .not. spec%run%is%new
164 0 : call spec%image%sync()
165 : ! set up the chain file.
166 : ! This chain file rewrite is necessary because the last two chains rows must be deleted.
167 0 : blockLeaderChainSetup: if (spec%image%is%leader) then
168 : block
169 : !integer :: ibeg, iend
170 : integer(IK) :: irow, iweight, nrow
171 0 : nrow = merge(CHAIN_RESTART_OFFSET, stat%cfc%nsam, spec%run%is%dry)
172 0 : if (spec%outputChainFileFormat%isCompact) then
173 0 : do irow = 0, nrow - 1
174 0 : backspace(spec%chainFile%unit, iostat = err%stat, iomsg = err%msg)
175 0 : RETURN_IF_FAILED(__LINE__,FAILED_IMAGE(err%stat /= 0),err%msg)
176 : end do
177 0 : elseif (spec%outputChainFileFormat%isVerbose) then
178 0 : do irow = 0, nrow - 1
179 0 : do iweight = 1, stat%cfc%sampleWeight(stat%cfc%nsam - irow)
180 0 : backspace(spec%chainFile%unit, iostat = err%stat, iomsg = err%msg)
181 0 : RETURN_IF_FAILED(__LINE__,FAILED_IMAGE(err%stat /= 0),err%msg)
182 : end do
183 : end do
184 0 : elseif (spec%outputChainFileFormat%isBinary .and. 0_IK < stat%cfc%nsam) then
185 : !print *, pos(2:) - pos(:size(pos) - 1)
186 0 : read(spec%chainFile%unit, pos = pos(stat%cfc%nsam - nrow + 1), iostat = err%stat, iomsg = err%msg)
187 0 : RETURN_IF_FAILED(__LINE__,FAILED_IMAGE(err%stat /= 0),err%msg)
188 : ! Infer the record length, then jump two records backward.
189 : !inquire(spec%chainFile%unit, pos = iend, iostat = err%stat, iomsg = err%msg)
190 : !RETURN_IF_FAILED(__LINE__,FAILED_IMAGE(err%occurred),err%msg)
191 : !ibeg = iend
192 : !irow = 0_IK
193 : !do
194 : ! ibeg = ibeg - 1_IK
195 : ! RETURN_IF_FAILED(__LINE__,FAILED_IMAGE(ibeg < 1_IK),SK_"Failed to infer the record length in the output binary chain file """//spec%chainFile%file//SK_"""")
196 : ! read(spec%chainFile%unit, pos = ibeg, iostat = err%stat ) stat%cfc%processID (stat%cfc%nsam) &
197 : ! , stat%cfc%delayedRejectionStage(stat%cfc%nsam) &
198 : ! , stat%cfc%meanAcceptanceRate (stat%cfc%nsam) &
199 : ! , stat%cfc%adaptationMeasure (stat%cfc%nsam) &
200 : ! , stat%cfc%burninLocation (stat%cfc%nsam) &
201 : ! , stat%cfc%sampleWeight (stat%cfc%nsam) &
202 : ! , stat%cfc%sampleLogFunc (stat%cfc%nsam) &
203 : ! , stat%cfc%sampleState (:, stat%cfc%nsam)
204 : ! if (err%stat == 0) then
205 : ! irow = irow + 1_IK
206 : ! !print *, irow
207 : ! if (irow <= nrow) cycle
208 : ! read(spec%chainFile%unit, pos = ibeg, iostat = err%stat, iomsg = err%msg)
209 : ! RETURN_IF_FAILED(__LINE__,FAILED_IMAGE(err%stat /= 0),err%msg)
210 : ! exit
211 : ! end if
212 : !end do
213 : !!
214 : !!if (0 < nrow) then
215 : !! inquire(spec%chainFile%unit, pos = ibeg, iostat = err%stat, iomsg = err%msg)
216 : !! RETURN_IF_FAILED(__LINE__,FAILED_IMAGE(err%stat /= 0),err%msg)
217 : !! write(spec%chainFile%unit,iostat = err%stat, iomsg = err%msg) stat%cfc%processID (stat%cfc%nsam) &
218 : !! , stat%cfc%delayedRejectionStage(stat%cfc%nsam) &
219 : !! , stat%cfc%meanAcceptanceRate (stat%cfc%nsam) &
220 : !! , stat%cfc%adaptationMeasure (stat%cfc%nsam) &
221 : !! , stat%cfc%burninLocation (stat%cfc%nsam) &
222 : !! , stat%cfc%sampleWeight (stat%cfc%nsam) &
223 : !! , stat%cfc%sampleLogFunc (stat%cfc%nsam) &
224 : !! , stat%cfc%sampleState (:, stat%cfc%nsam)
225 : !! RETURN_IF_FAILED(__LINE__,FAILED_IMAGE(err%stat /= 0),err%msg)
226 : !! inquire(spec%chainFile%unit, pos = iend, iostat = err%stat, iomsg = err%msg)
227 : !! RETURN_IF_FAILED(__LINE__,FAILED_IMAGE(err%stat /= 0),err%msg)
228 : !! write(spec%chainFile%unit, pos = iend - (nrow + 1) * (iend - ibeg), iostat = err%stat, iomsg = err%msg)
229 : !! RETURN_IF_FAILED(__LINE__,FAILED_IMAGE(err%stat /= 0),err%msg)
230 : !!end if
231 : end if
232 : !! Create a temporary copy of the chain file, just for the sake of not losing the simulation results if the write action fails at any stage.
233 : !character(:, SK), allocatable :: contents, tempfile
234 : !call setContentsFrom(spec%chainFile%file, contents, iostat = iostat, iomsg = err%msg)
235 : !RETURN_IF_FAILED(__LINE__,FAILED_IMAGE(iostat /= 0_IK),err%msg)
236 : !tempfile = spec%chainFile%file//SK_".restart"
237 : !call setContentsTo(tempfile, contents, iostat = iostat, iomsg = err%msg)
238 : !RETURN_IF_FAILED(__LINE__,FAILED_IMAGE(iostat /= 0_IK),err%msg)
239 : !! reopen the chain file to write contents minus the last two rows and resume the simulation.
240 : !err = getErrChainWrite(stat%cfc, spec%chainFile%file, spec%outputChainFileFormat%val, spec%chainFile%format%rows, 1_IK, stat%cfc%nsam - CHAIN_RESTART_OFFSET, spec%proposalAdaptationPeriod%val)
241 : !if (err%iswarned .and. spec%image%is%leader) call spec%disp%warn%show(PROCEDURE_NAME//getFine(__LINE__)//SK_": "//err%msg)
242 : !RETURN_IF_FAILED(__LINE__,FAILED_IMAGE(err%occurred),err%msg)
243 : !! remove the temporary copy of the chain file
244 : !RETURN_IF_FAILED(__LINE__,FAILED_IMAGE(isFailedRemove(tempfile, errmsg = err%msg)),err%msg)
245 : !deallocate(contents, tempfile)
246 : end block
247 : end if blockLeaderChainSetup
248 : !RETURN_IF_FAILED(__LINE__,FAILED_IMAGE(err%occurred),err%msg)
249 : end if blockNewDryRun
250 :
251 13 : stat%timer = timer_type()
252 : !stat%timer%start = stat%timer%time()
253 :
254 13 : if (spec%image%is%leader) call setResized(adaptationMeasure, adaptationMeasureLen)
255 13 : if (spec%run%is%new) then ! this must be done separately from the above blockNewDryRun.
256 13 : stat%cfc%burninLocation(1) = 1_IK
257 40 : CO_LOGFUNCSTATE(1 : ndim, 1, 0) = spec%proposalStart%val ! proposal state.
258 : #if OMP_ENABLED && (MATLAB_ENABLED || PYTHON_ENABLED || R_ENABLED)
259 : co_logFuncState(1 : ndim, 2 : spec%image%count, 0) = spread(co_logFuncState(1 : ndim, 1, 0), 2, spec%image%count - 1) ! This defines all input states to the user-defined subroutine.
260 : block
261 : real(RKD) :: avgTimePerFunCall, avgCommPerFunCall
262 : !avgTimePerFunCall = epsilon(0._RKD); avgCommPerFunCall = epsilon(0._RKD)
263 : mold = getLogFunc(co_logFuncState(:, 1 : spec%image%count, 0), avgTimePerFunCall, avgCommPerFunCall)
264 : stat%avgTimePerFunCall = stat%avgTimePerFunCall + max(avgTimePerFunCall, epsilon(0._RKD))
265 : stat%avgCommPerFunCall = stat%avgCommPerFunCall + max(avgCommPerFunCall, epsilon(0._RKD))
266 : end block
267 : #else
268 13 : stat%timer%clock = stat%timer%time()
269 13 : CO_LOGFUNCSTATE(0, 1, 0) = getLogFunc(CO_LOGFUNCSTATE(1 : ndim, 1, 0)) ! proposal logFunc
270 13 : stat%avgTimePerFunCall = stat%avgTimePerFunCall + stat%timer%time() - stat%timer%clock
271 : SET_OMP(co_logFuncState(:, 2 : spec%image%count, 0) = spread(co_logFuncState(:, 1, 0), 2, spec%image%count - 1))
272 : #endif
273 : else
274 0 : CO_LOGFUNCSTATE(1 : ndim, 1, 0) = stat%cfc%sampleState(1 : ndim, 1) ! proposal state
275 0 : CO_LOGFUNCSTATE(0, 1, 0) = stat%cfc%sampleLogFunc(1) ! proposal logFunc
276 : SET_CONOMP(co_logFuncState(:, 2 : spec%image%count, 0) = spread(co_logFuncState(:, 1, 0), 2, spec%image%count - 1))
277 : end if
278 53 : CO_LOGFUNCSTATE(0 : ndim, :, -1) = CO_LOGFUNCSTATE(0 : ndim, :, 0) ! set current logFunc and State equal to the first proposal.
279 13 : stat%chain%mode%val = -huge(stat%chain%mode%val)
280 13 : stat%chain%mode%loc = 0_IK
281 : SET_PARALLEL(if (spec%parallelism%is%singleChain) then)
282 : SET_PARALLEL(imageEndID = spec%image%count)
283 : SET_PARALLEL(imageStartID = 1)
284 : SET_PARALLEL(else) ! is%multiChain ! This never happens in the current OpenMP implementation.
285 : #if OMP_ENABLED
286 : error stop "Internal error occurred: multiChain parallelism is unsupported in threaded or concurrent simulations."
287 : #endif
288 : SET_PARALLEL(imageStartID = spec%image%id)
289 : SET_PARALLEL(imageEndID = spec%image%id)
290 : SET_PARALLEL(end if)
291 :
292 : !%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% start of loopMarkovChain %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
293 : !%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
294 : !%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
295 :
296 13 : call initProgressReport(spec)
297 :
298 : loopMarkovChain: do
299 :
300 0 : co_proposalFound_samplerUpdateOccurred(2) = 0 ! at each iteration assume no samplerUpdateOccurred, unless it occurs.
301 : SET_CAFMPI(blockLeaderImage: if (spec%image%is%leader) then)
302 :
303 : co_proposalFound_samplerUpdateOccurred(1) = 0 ! co_proposalFound = .false._LK
304 831348 : adaptationIsGreedy = counterAUC < spec%proposalAdaptationCountGreedy%val
305 :
306 : SET_PARALLEL(imageID = imageStartID)
307 : SET_PARALLEL(loopOverImages: do)
308 :
309 : SET_PARALLEL(if (imageEndID < imageID) exit loopOverImages)
310 : SET_CAF(stat%timer%clock = stat%timer%time())
311 : SET_CAF(if (imageID /= spec%image%id) co_accr(-1 : spec%proposalDelayedRejectionCount%val) = co_accr(-1 : spec%proposalDelayedRejectionCount%val)[imageID]) ! happens only in is%singleChain holds.
312 : SET_CAF(stat%avgCommPerFunCall = stat%avgCommPerFunCall + stat%timer%time() - stat%timer%clock)
313 : SET_MPI(if (imageID /= spec%image%id) co_accr(-1 : spec%proposalDelayedRejectionCount%val) = accrMat(-1 : spec%proposalDelayedRejectionCount%val, imageID)) ! happens only in is%singleChain holds.
314 : SET_CONOMP(if (imageID <= spec%image%count) co_accr(1, -1 : spec%proposalDelayedRejectionCount%val) = co_accr(imageID, -1 : spec%proposalDelayedRejectionCount%val))
315 : ! This workaround is essential for efficient Coarray/MPI communications, but redundant for serial, openmp, or concurrent applications.
316 : ! But who cares using a few more CPU cycles for serial mode in exchange for saving thousands in parallel?
317 831348 : counterDRS = nint(CO_ACCR(1, -1), IK)
318 831348 : if (-1_IK < counterDRS) co_proposalFound_samplerUpdateOccurred(1) = 1 ! co_proposalFound = .true._LK
319 :
320 : !%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% blockProposalAccepted %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
321 : !%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
322 :
323 : ! On the very first iteration, this block is (and must be) executed for imageID == 1
324 : ! since it is for the first (starting) point, which is assumed to have been accepted
325 : ! as the first point by the first coarray imageID.
326 :
327 831348 : blockProposalAccepted: if (co_proposalFound_samplerUpdateOccurred(1) == 1) then ! co_proposalAccepted = .true._LK
328 :
329 : currentStateWeight = 0_IK
330 :
331 : ! communicate the accepted logFunc and State from the winning image to the leader/all images: co_logFuncState.
332 : #if CAF_ENABLED
333 : if (imageID /= spec%image%id) then ! Avoid remote connection for something that is available locally.
334 : stat%timer%clock = stat%timer%time()
335 : co_logFuncState(0 : ndim, -1) = co_logFuncState(0 : ndim, -1)[imageID]
336 : stat%avgCommPerFunCall = stat%avgCommPerFunCall + stat%timer%time() - stat%timer%clock
337 : end if
338 : #elif MPI_ENABLED
339 : if (spec%parallelism%is%singleChain) then
340 : stat%timer%clock = stat%timer%time()
341 : ! broadcast winning image to all processes: buffer, count, datatype, root (broadcasting rank), comm, ierr
342 : call mpi_bcast(imageID, 1, mpi_integer, 0, mpi_comm_world, ierrMPI)
343 : ! broadcast co_logFuncState from the winning image to all others: buffer, count, datatype, root (broadcasting rank), comm, ierr
344 : call mpi_bcast(co_logFuncState(0 : ndim, -1), ndimp1 * NBRK, mpi_byte, imageID - 1, mpi_comm_world, ierrMPI)
345 : !call mpi_bcast(co_logFuncState(0 : ndim, -1), ndimp1, mpi_double_precision, imageID - 1, mpi_comm_world, ierrMPI)
346 : stat%avgCommPerFunCall = stat%avgCommPerFunCall + stat%timer%time() - stat%timer%clock
347 : end if
348 : #endif
349 : ! Note: after every adaptive update of the sampler, counterAUP is reset to 0.
350 285050 : if (counterAUP == 0_IK .and. adaptationSucceeded) then
351 20430 : numFunCallAcceptedLastAdaptation = numFunCallAcceptedLastAdaptation + 1_IK
352 : lastStateWeight = 0_IK
353 : end if
354 :
355 285050 : blockFreshDryRun: if (spec%run%is%new) then
356 285050 : call writeCFC(spec, stat, adaptationMeasure)
357 285050 : stat%numFunCallAccepted = stat%numFunCallAccepted + 1_IK
358 285050 : stat%cfc%processID(stat%numFunCallAccepted) = imageID
359 285050 : stat%cfc%delayedRejectionStage(stat%numFunCallAccepted) = counterDRS
360 285050 : stat%cfc%adaptationMeasure(stat%numFunCallAccepted) = 0._RKC
361 285050 : stat%cfc%sampleWeight(stat%numFunCallAccepted) = 0_IK
362 285050 : stat%cfc%sampleLogFunc(stat%numFunCallAccepted) = CO_LOGFUNCSTATE(0, imageID, -1)
363 1300150 : stat%cfc%sampleState(1 : ndim, stat%numFunCallAccepted) = CO_LOGFUNCSTATE(1 : ndim, imageID, -1)
364 : ! Find the burnin point.
365 285050 : stat%cfc%burninLocation(stat%numFunCallAccepted) = getBurninLoc(stat%numFunCallAccepted, stat%chain%mode%val, stat%cfc%sampleLogFunc(1 : stat%numFunCallAccepted))
366 : else blockFreshDryRun ! in restart mode: determine the correct value of co_proposalFound_samplerUpdateOccurred(1).
367 0 : numFunCallAcceptedPlusOne = stat%numFunCallAccepted + 1_IK
368 0 : if (numFunCallAcceptedPlusOne == stat%cfc%nsam) then
369 0 : spec%run%is%new = .true._LK
370 0 : call writeCFC(spec, stat, adaptationMeasure)
371 0 : spec%run%is%dry = .not. spec%run%is%new
372 0 : stat%cfc%sampleWeight(numFunCallAcceptedPlusOne) = 0_IK
373 0 : sumAccrAccRej = stat%cfc%meanAcceptanceRate(stat%numFunCallAccepted) * real(stat%numFunCallAcceptedRejected, RKC)
374 0 : if (proposal%delRejEnabled) sumAccrAccRejDel = stat%cfc%meanAcceptanceRate(stat%numFunCallAccepted) * real(stat%numFunCallAcceptedRejectedDelayed, RKC)
375 : end if
376 0 : stat%numFunCallAccepted = numFunCallAcceptedPlusOne
377 0 : numFunCallAcceptedPlusOne = stat%numFunCallAccepted + 1_IK
378 : end if blockFreshDryRun
379 285050 : if (stat%chain%mode%val < stat%cfc%sampleLogFunc(stat%numFunCallAccepted)) then
380 194 : stat%chain%mode%val = stat%cfc%sampleLogFunc(stat%numFunCallAccepted)
381 194 : stat%chain%mode%loc = stat%numFunCallAccepted
382 : end if
383 285050 : sumAccrAccRej = sumAccrAccRej + CO_ACCR(imageID, counterDRS)
384 :
385 : else blockProposalAccepted
386 :
387 546298 : counterDRS = spec%proposalDelayedRejectionCount%val
388 546298 : sumAccrAccRej = sumAccrAccRej + CO_ACCR(1, counterDRS)
389 :
390 : end if blockProposalAccepted
391 :
392 : !%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
393 : !%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% blockProposalAccepted %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
394 :
395 831348 : counterAUP = counterAUP + 1_IK
396 831348 : stat%progress%counterPRP = stat%progress%counterPRP + 1_IK
397 : ! UPDATE: The following comment is irrelevant and history as of Nov 2023.
398 : ! In fact, it is now required to update `stat%numFunCallAcceptedRejected` before calling `reportProgress()`
399 : ! because `numFunCallAcceptedRejected` on the sampler status report display will be always off by one.
400 : ! OLD COMMENTS: It is critical for this if block to occur before updating `stat%numFunCallAcceptedRejected` otherwise,
401 : ! `numFunCallAcceptedRejectedLastReport` will be updated to `stat%numFunCallAcceptedRejected`
402 : ! which can lead to "Floating-point exception - erroneous arithmetic operation" in the computation
403 : ! of `inverseoutputReportPeriod` when `blockLastSample` happens to be activated.
404 831348 : stat%numFunCallAcceptedRejected = stat%numFunCallAcceptedRejected + 1_IK
405 831348 : if (stat%progress%counterPRP == spec%outputReportPeriod%val) call reportProgress(spec, stat)
406 831348 : currentStateWeight = currentStateWeight + 1_IK
407 831348 : if (proposal%delRejEnabled) then
408 0 : sumAccrAccRejDel = sumAccrAccRejDel + sum(CO_ACCR(1, 0 : counterDRS))
409 66667 : stat%numFunCallAcceptedRejectedDelayed = stat%numFunCallAcceptedRejectedDelayed + counterDRS + 1_IK
410 : end if
411 831348 : if (spec%run%is%new) then ! these are used for adaptive proposal updating, so they have to be set on every accepted or rejected iteration (excluding delayed rejections)
412 831348 : stat%cfc%meanAcceptanceRate(stat%numFunCallAccepted) = sumAccrAccRej / real(stat%numFunCallAcceptedRejected,kind=RKC)
413 831348 : stat%cfc%sampleWeight(stat%numFunCallAccepted) = stat%cfc%sampleWeight(stat%numFunCallAccepted) + 1_IK
414 831348 : if (adaptationMeasureLen < stat%cfc%sampleWeight(stat%numFunCallAccepted)) then
415 6 : adaptationMeasureLen = 2_IK * adaptationMeasureLen
416 6 : call setResized(adaptationMeasure, adaptationMeasureLen)
417 : end if
418 : else
419 : SET_CAFMPI(acceptedRejectedDelayedUnusedRestartMode = stat%numFunCallAcceptedRejectedDelayedUnused)
420 : SET_CONOMP(acceptedRejectedDelayedUnusedRestartMode = stat%numFunCallAcceptedRejectedDelayedUnused)
421 0 : SET_SERIAL(acceptedRejectedDelayedUnusedRestartMode = stat%numFunCallAcceptedRejectedDelayed)
422 : end if
423 :
424 : !%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% last output write %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
425 : !%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
426 :
427 : ! in paradise mode, it is imperative to finish the simulation before any further redundant sampler updates occurs.
428 : ! This is the reason why blockLastSample appears before blockSamplerAdaptation.
429 :
430 831348 : blockLastSample: if (stat%numFunCallAccepted == spec%outputChainSize%val) then !co_missionAccomplished = .true._LK
431 : ! on 3 images Windows, substituting co_missionAccomplished with the following leads to 10% less communication overhead for 1D Gaussian example
432 : ! on 3 images Linux , substituting co_missionAccomplished with the following leads to 16% less communication overhead for 1D Gaussian example
433 : ! on 5 images Linux , substituting co_missionAccomplished with the following leads to 11% less communication overhead for 1D Gaussian example
434 : co_proposalFound_samplerUpdateOccurred(1) = -1 ! equivalent to co_missionAccomplished = .true._LK
435 13 : if (spec%run%is%new) then
436 13 : call writeCFC(spec, stat, adaptationMeasure)
437 13 : flush(spec%chainFile%unit)
438 : end if
439 13 : call reportProgress(spec, stat, timeLeft = 0._RKD) ! timeElapsed = stat%timer%time() - stat%timer%start, timeLeft = 0._RKC
440 : SET_PARALLEL(exit loopOverImages)
441 : end if blockLastSample
442 :
443 : !%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
444 : !%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% last output write %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
445 : !%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
446 : !%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% proposal adaptationMeasure %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
447 : !%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
448 :
449 831348 : blockSamplerAdaptation: if (counterAUC < spec%proposalAdaptationCount%val .and. counterAUP == spec%proposalAdaptationPeriod%val) then
450 0 : co_proposalFound_samplerUpdateOccurred(2) = 1 ! istart = numFunCallAcceptedLastAdaptation ! = max( numFunCallAcceptedLastAdaptation , stat%cfc%burninLocation(stat%numFunCallAccepted) ) ! this is experimental
451 : ! the order in the following two MUST be preserved because occasionally stat%numFunCallAccepted = numFunCallAcceptedLastAdaptation
452 60932 : dumint = stat%cfc%sampleWeight(stat%numFunCallAccepted) ! needed for the restart mode, not needed in the fresh run
453 60932 : if (stat%numFunCallAccepted == numFunCallAcceptedLastAdaptation) then ! no new point has been accepted since last time
454 1644 : stat%cfc%sampleWeight(numFunCallAcceptedLastAdaptation) = currentStateWeight - lastStateWeight
455 8220 : CHECK_ASSERTION(__LINE__, mod(stat%cfc%sampleWeight(numFunCallAcceptedLastAdaptation), spec%proposalAdaptationPeriod%val) == 0_IK, PROCEDURE_NAME//SK_": Internal error occurred: "//getStr([spec%proposalAdaptationPeriod%val, stat%cfc%sampleWeight(numFunCallAcceptedLastAdaptation), currentStateWeight, lastStateWeight]))
456 : else
457 59288 : stat%cfc%sampleWeight(numFunCallAcceptedLastAdaptation) = stat%cfc%sampleWeight(numFunCallAcceptedLastAdaptation) - lastStateWeight
458 59288 : stat%cfc%sampleWeight(stat%numFunCallAccepted) = currentStateWeight ! needed for the restart mode, not needed in the fresh run
459 : end if
460 60932 : meanAccRateSinceStart = stat%cfc%meanAcceptanceRate(stat%numFunCallAccepted) ! used only in fresh runs, but is not worth putting it in a conditional fresh run block.
461 :
462 : call setProposalAdapted ( spec, proposal &
463 : , sampleState = stat%cfc%sampleState(:, numFunCallAcceptedLastAdaptation : stat%numFunCallAccepted) &
464 : , sampleWeight = stat%cfc%sampleWeight(numFunCallAcceptedLastAdaptation : stat%numFunCallAccepted) &
465 : , adaptationIsGreedy = adaptationIsGreedy &
466 : , meanAccRateSinceStart = meanAccRateSinceStart &
467 : , adaptationSucceeded = adaptationSucceeded &
468 : , adaptationMeasure = adaptationMeasure(dumint) &
469 : , err = err &
470 60932 : )
471 60932 : RETURN_IF_FAILED(__LINE__,FAILED_IMAGE(err%occurred),err%msg)
472 60932 : if (spec%run%is%dry) sumAccrAccRej = meanAccRateSinceStart * stat%numFunCallAcceptedRejected
473 60932 : stat%cfc%sampleWeight(stat%numFunCallAccepted) = dumint ! needed for the restart mode, not needed in the fresh run, but is not worth fencing it.
474 60932 : if (stat%numFunCallAccepted == numFunCallAcceptedLastAdaptation) then
475 : !adaptationMeasure = adaptationMeasure + adaptationMeasureDummy ! this is the worst-case upper-bound
476 1644 : stat%cfc%adaptationMeasure(stat%numFunCallAccepted) = min(1._RKC, stat%cfc%adaptationMeasure(stat%numFunCallAccepted) + adaptationMeasure(dumint)) ! this is the worst-case upper-bound
477 : else
478 : !adaptationMeasure = adaptationMeasureDummy
479 59288 : stat%cfc%adaptationMeasure(stat%numFunCallAccepted) = adaptationMeasure(dumint)
480 59288 : stat%cfc%sampleWeight(numFunCallAcceptedLastAdaptation) = stat%cfc%sampleWeight(numFunCallAcceptedLastAdaptation) + lastStateWeight
481 : end if
482 60932 : if (adaptationSucceeded) then
483 : lastStateWeight = currentStateWeight ! stat%cfc%sampleWeight(stat%numFunCallAccepted) ! informative, do not remove
484 50825 : numFunCallAcceptedLastAdaptation = stat%numFunCallAccepted
485 : end if
486 : counterAUP = 0_IK
487 60932 : counterAUC = counterAUC + 1_IK
488 : !if (counterAUC==spec%proposalAdaptationCount%val) adaptationMeasure = 0._RKC
489 : else blockSamplerAdaptation
490 770416 : adaptationMeasure(stat%cfc%sampleWeight(stat%numFunCallAccepted)) = 0._RKC
491 : end if blockSamplerAdaptation
492 :
493 : !%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
494 : !%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% proposal adaptationMeasure %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
495 :
496 : SET_PARALLEL(if (co_proposalFound_samplerUpdateOccurred(1) == 1) exit loopOverImages)
497 : SET_PARALLEL(imageID = imageID + 1)
498 :
499 : SET_PARALLEL(end do loopOverImages)
500 :
501 : SET_CAF(if (spec%parallelism%is%singleChain) then)
502 : SET_CAF(if (co_proposalFound_samplerUpdateOccurred(2) == 1) call bcastProposalAdaptation(spec, proposal))
503 : SET_CAF(stat%timer%clock = stat%timer%time())
504 : SET_CAF(sync images(*))
505 : SET_CAF(stat%avgCommPerFunCall = stat%avgCommPerFunCall + stat%timer%time() - stat%timer%clock)
506 : SET_CAF(end if)
507 : SET_MPI(if (spec%parallelism%is%singleChain .and. co_proposalFound_samplerUpdateOccurred(1) == 0) then)
508 : ! broadcast rank #0 to all processes indicating unsuccessful sampling.
509 : SET_MPI(imageID = 0)
510 : ! buffer; count; datatype; root: broadcasting rank; comm; ierr
511 : SET_MPI(call mpi_bcast(imageID, 1, mpi_integer, 0, mpi_comm_world, ierrMPI))
512 : SET_MPI(end if)
513 :
514 : SET_CAFMPI(else blockLeaderImage) ! ATTN: This block should be executed only when singleChain parallelism is requested
515 :
516 : #if CAF_ENABLED
517 : sync images(1)
518 : ! get the accepted proposal from the first image
519 : stat%timer%clock = stat%timer%time()
520 : co_proposalFound_samplerUpdateOccurred(1:2) = co_proposalFound_samplerUpdateOccurred(1:2)[1]
521 : if (co_proposalFound_samplerUpdateOccurred(1) == 1) co_logFuncState(0 : ndim, -1) = co_logFuncState(0 : ndim, -1)[1]
522 : if (co_proposalFound_samplerUpdateOccurred(2) == 1) call bcastProposalAdaptation(spec, proposal)
523 : stat%avgCommPerFunCall = stat%avgCommPerFunCall + stat%timer%time() - stat%timer%clock
524 : if (spec%run%is%dry) then
525 : if (co_proposalFound_samplerUpdateOccurred(1) == 1) then
526 : stat%numFunCallAccepted = stat%numFunCallAccepted + 1_IK
527 : numFunCallAcceptedPlusOne = stat%numFunCallAccepted + 1_IK
528 : currentStateWeight = 1_IK
529 : if (stat%numFunCallAccepted == stat%cfc%nsam) then
530 : spec%run%is%dry = .false._LK
531 : spec%run%is%new = .true._LK
532 : end if
533 : else
534 : currentStateWeight = currentStateWeight + spec%image%count
535 : end if
536 : else
537 : SET_DEBUG(stat%numFunCallAccepted = stat%numFunCallAccepted + co_proposalFound_samplerUpdateOccurred(1))
538 : end if
539 : #elif MPI_ENABLED
540 : ! fetch the winning rank from the main process
541 : stat%timer%clock = stat%timer%time()
542 : ! buffer, count, datatype, root: broadcasting rank, comm, ierr
543 : call mpi_bcast(imageID, 1, mpi_integer, 0, mpi_comm_world, ierrMPI)
544 : stat%avgCommPerFunCall = stat%avgCommPerFunCall + stat%timer%time() - stat%timer%clock
545 : if (0 < imageID) then ! co_proposalFound = .true._LK, sampling successful
546 : ! broadcast co_logFuncState from the winning image to all others
547 : stat%timer%clock = stat%timer%time()
548 : ! buffer, count, datatype, root: broadcasting rank, comm, ierr
549 : !call mpi_bcast(co_logFuncState(0 : ndim, -1), ndimp1, mpi_double_precision, imageID - 1, mpi_comm_world, ierrMPI)
550 : call mpi_bcast(co_logFuncState(0 : ndim, -1), ndimp1 * NBRK, mpi_byte, imageID - 1, mpi_comm_world, ierrMPI)
551 : stat%avgCommPerFunCall = stat%avgCommPerFunCall + stat%timer%time() - stat%timer%clock
552 : end if
553 : if (spec%run%is%dry) then
554 : if (0 < imageID) then ! equivalent to co_proposalFound_samplerUpdateOccurred(1) == 1_IK
555 : stat%numFunCallAccepted = stat%numFunCallAccepted + 1_IK
556 : numFunCallAcceptedPlusOne = stat%numFunCallAccepted + 1_IK
557 : currentStateWeight = 1_IK
558 : if (stat%numFunCallAccepted == stat%cfc%nsam) then
559 : spec%run%is%new = .true._LK
560 : spec%run%is%dry = .false._LK
561 : end if
562 : else
563 : currentStateWeight = currentStateWeight + spec%image%count
564 : end if
565 : else
566 : SET_DEBUG(stat%numFunCallAccepted = stat%numFunCallAccepted + co_proposalFound_samplerUpdateOccurred(1))
567 : end if
568 : #endif
569 : SET_CAFMPI(end if blockLeaderImage)
570 :
571 831348 : RETURN_IF_FAILED(__LINE__,FAILED_IMAGE(err%occurred),err%msg)
572 :
573 : !%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
574 : !%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% begin Common block between all images %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
575 :
576 : #if MPI_ENABLED
577 : ! broadcast samplerUpdateOccurred from the root process to all others, and broadcast proposal adaptations if needed.
578 : if (spec%parallelism%is%singleChain) then
579 : stat%timer%clock = stat%timer%time()
580 : ! buffer: XXX: first element of co_proposalFound_samplerUpdateOccurred not needed except to end the simulation.
581 : ! This could perhaps be enhanced in the further to only pass one elemtn.
582 : ! buffer, count, datatype, root: broadcasting rank, comm, ierr
583 : call mpi_bcast(co_proposalFound_samplerUpdateOccurred, 2, mpi_integer, 0, mpi_comm_world, ierrMPI)
584 : if (co_proposalFound_samplerUpdateOccurred(2) == 1) call bcastProposalAdaptation(spec, proposal)
585 : stat%avgCommPerFunCall = stat%avgCommPerFunCall + stat%timer%time() - stat%timer%clock
586 : end if
587 : #endif
588 831348 : if (co_proposalFound_samplerUpdateOccurred(1) == -1) exit loopMarkovChain ! we are done: co_missionAccomplished = .true._LK
589 831335 : CO_ACCR(:, -1) = -1._RKC ! counterDRS at which new proposal is accepted. This null initialization is essential for all serial and parallel modes.
590 831335 : maxLogFuncRejectedProposal = -huge(maxLogFuncRejectedProposal) * .1_RKC
591 :
592 : ! This CAFMPI_SINGLCHAIN_ENABLED preprocessing avoids unnecessary communication in serial or multichain-parallel modes.
593 :
594 : #if CAF_ENABLED || MPI_ENABLED
595 : blockSingleChainParallelism: if (spec%parallelism%is%singleChain) then
596 : proposalFoundSinglChainMode = 0
597 : ! This CAF syncing is necessary since the co_logFuncState has to be fetched from the
598 : ! first image by all other images before it is updated again below by the first image.
599 : SET_CAF(if (spec%image%is%leader) then)
600 : SET_CAF(stat%timer%clock = stat%timer%time())
601 : SET_CAF(sync images(*))
602 : SET_CAF(stat%avgCommPerFunCall = stat%avgCommPerFunCall + stat%timer%time() - stat%timer%clock)
603 : SET_CAF(else)
604 : SET_CAF(sync images(1))
605 : SET_CAF(end if)
606 : #define CAFMPI_SINGLCHAIN_ENABLED 1
607 : #include "pm_sampling_kernel.inc.inc.F90"
608 : #undef CAFMPI_SINGLCHAIN_ENABLED
609 : ! gather all accr on the leader image.
610 : SET_MPI(stat%timer%clock = stat%timer%time())
611 : ! send buffer, send count, send datatype, recv buffer, recv count, recv datatype, root rank, comm, mpi error flag
612 : !SET_MPI(call mpi_gather(co_accr, proposalDelayedRejectionCountPlusTwo, mpi_double_precision, accrMat(:,:), proposalDelayedRejectionCountPlusTwo, mpi_double_precision, 0, mpi_comm_world, ierrMPI))
613 : SET_MPI(call mpi_gather(co_accr, proposalDelayedRejectionCountPlusTwo, mpi_byte, accrMat(:,:), proposalDelayedRejectionCountPlusTwo, mpi_byte, 0, mpi_comm_world, ierrMPI))
614 : SET_MPI(stat%avgCommPerFunCall = stat%avgCommPerFunCall + stat%timer%time() - stat%timer%clock)
615 : SET_CAF(if (spec%image%is%leader) then)
616 : SET_CAF(stat%timer%clock = stat%timer%time())
617 : SET_CAF(sync images(*))
618 : SET_CAF(stat%avgCommPerFunCall = stat%avgCommPerFunCall + stat%timer%time() - stat%timer%clock)
619 : SET_CAF(else)
620 : SET_CAF(sync images(1))
621 : SET_CAF(end if)
622 : else blockSingleChainParallelism ! multichain.
623 : block
624 : #include "pm_sampling_kernel.inc.inc.F90"
625 : end block
626 : end if blockSingleChainParallelism
627 : #elif OMP_ENABLED
628 : #include "pm_sampling_kernel.inc.inc.F90"
629 : #else
630 : ! serial.
631 : #include "pm_sampling_kernel.inc.inc.F90"
632 : #endif
633 : !%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% end Common block between all images %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
634 : !%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
635 :
636 : end do loopMarkovChain
637 :
638 : #if (MPI_ENABLED || CAF_ENABLED) && (CODECOV_ENABLED || SAMPLER_TEST_ENABLED)
639 : block; use pm_err, only: isFailedImage; call isFailedImage(err%occurred); end block
640 : #endif
641 : if (err%occurred) return
642 :
643 13 : stat%timer%clock = stat%timer%time() - stat%timer%start ! This one last timing is crucial for postprocessing report.
644 :
645 : !%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
646 : !%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
647 : !%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%* end of loopMarkovChain %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
648 :
649 13 : stat%cfc%nsam = stat%numFunCallAccepted
650 13 : stat%cfc%sumw = stat%numFunCallAcceptedRejected
651 :
652 13 : if (.not. proposal%delRejEnabled) then
653 11 : stat%numFunCallAcceptedRejectedDelayed = stat%numFunCallAcceptedRejected
654 0 : sumAccrAccRejDel = sumAccrAccRej
655 : end if
656 :
657 13 : if (spec%image%is%leader) then
658 : block
659 : integer(IK) :: i
660 13 : if (0.999999_RKC < spec%burninAdaptationMeasure%val) then
661 13 : stat%burninLocDRAM%compact = 1_IK
662 13 : stat%burninLocDRAM%verbose = 1_IK
663 : else
664 0 : stat%burninLocDRAM%compact = stat%numFunCallAccepted
665 0 : stat%burninLocDRAM%verbose = stat%numFunCallAcceptedRejected - stat%cfc%sampleWeight(stat%numFunCallAccepted) + 1_IK
666 0 : loopAdaptationBurnin: do i = stat%numFunCallAccepted - 1, 1, -1
667 0 : if (spec%burninAdaptationMeasure%val < stat%cfc%adaptationMeasure(i)) exit loopAdaptationBurnin
668 0 : stat%burninLocDRAM%compact = stat%burninLocDRAM%compact - 1_IK
669 0 : stat%burninLocDRAM%verbose = stat%burninLocDRAM%verbose - stat%cfc%sampleWeight(i)
670 : end do loopAdaptationBurnin
671 : end if
672 : end block
673 13 : stat%burninLocMCMC%compact = stat%cfc%burninLocation(stat%numFunCallAccepted)
674 45 : stat%burninLocMCMC%verbose = sum(stat%cfc%sampleWeight(1 : stat%burninLocMCMC%compact - 1)) + 1_IK
675 53 : stat%chain%mode%crd = stat%cfc%sampleState(1 : ndim, stat%chain%mode%loc)
676 : !stat%chain%mode%Loc%verbose = sum(stat%cfc%sampleWeight(1:stat%chain%mode%loc-1)) + 1_IK
677 13 : close(spec%restartFile%unit)
678 13 : close(spec%chainFile%unit)
679 : endif
680 :
681 : ! LCOV_EXCL_START
682 : ! multichain parallelism should never happen for serial, concurrent, or openmp applications.
683 : SET_PARALLEL(if (spec%parallelism%is%multiChain) then)
684 : stat%avgCommPerFunCall = 0._RKC
685 : stat%numFunCallAcceptedRejectedDelayedUnused = stat%numFunCallAcceptedRejectedDelayed
686 : dumint = stat%numFunCallAcceptedRejectedDelayedUnused - acceptedRejectedDelayedUnusedRestartMode ! this is needed to avoid division-by-zero undefined behavior
687 : if (dumint /= 0_IK) stat%avgTimePerFunCall = stat%avgTimePerFunCall / dumint
688 : SET_PARALLEL(elseif(spec%image%is%first) then)
689 : SET_PARALLEL(stat%avgCommPerFunCall = stat%avgCommPerFunCall / stat%numFunCallAcceptedRejectedDelayed)
690 : SET_PARALLEL(dumint = stat%numFunCallAcceptedRejectedDelayedUnused - acceptedRejectedDelayedUnusedRestartMode) ! this is needed to avoid division-by-zero undefined behavior
691 : SET_PARALLEL(if (dumint /= 0_IK) stat%avgTimePerFunCall = (stat%avgTimePerFunCall / dumint) * spec%image%count)
692 : SET_PARALLEL(end if)
693 : ! LCOV_EXCL_STOP
694 : #undef CO_LOGFUNCSTATE
695 : #undef SET_PARALLEL
696 : #undef SET_CONOMP
697 : #undef SHAPE_ARG
698 : #undef GET_ELL
699 : #undef CO_ACCR
|