Line data Source code
1 : !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
2 : !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
3 : !!!!
4 : !!!! MIT License
5 : !!!!
6 : !!!! ParaMonte: plain powerful parallel Monte Carlo library.
7 : !!!!
8 : !!!! Copyright (C) 2012-present, The Computational Data Science Lab
9 : !!!!
10 : !!!! This file is part of the ParaMonte library.
11 : !!!!
12 : !!!! Permission is hereby granted, free of charge, to any person obtaining a
13 : !!!! copy of this software and associated documentation files (the "Software"),
14 : !!!! to deal in the Software without restriction, including without limitation
15 : !!!! the rights to use, copy, modify, merge, publish, distribute, sublicense,
16 : !!!! and/or sell copies of the Software, and to permit persons to whom the
17 : !!!! Software is furnished to do so, subject to the following conditions:
18 : !!!!
19 : !!!! The above copyright notice and this permission notice shall be
20 : !!!! included in all copies or substantial portions of the Software.
21 : !!!!
22 : !!!! THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
23 : !!!! EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
24 : !!!! MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
25 : !!!! IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
26 : !!!! DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
27 : !!!! OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE
28 : !!!! OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
29 : !!!!
30 : !!!! ACKNOWLEDGMENT
31 : !!!!
32 : !!!! ParaMonte is an honor-ware and its currency is acknowledgment and citations.
33 : !!!! As per the ParaMonte library license agreement terms, if you use any parts of
34 : !!!! this library for any purposes, kindly acknowledge the use of ParaMonte in your
35 : !!!! work (education/research/industry/development/...) by citing the ParaMonte
36 : !!!! library as described on this page:
37 : !!!!
38 : !!!! https://github.com/cdslaborg/paramonte/blob/main/ACKNOWLEDGMENT.md
39 : !!!!
40 : !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
41 : !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
42 :
43 : !> \brief This module contains the classes and procedures for fitting a Cyclic Geometric distribution.
44 : !> \author Amir Shahmoradi
45 :
46 : ! The functions and procedures contained in this module were originally part of the [Statistics_mod](@ref statistics_mod)
47 : ! module. However, they were moved out to bypass the segmentation fault error with internal functions when the library is
48 : ! compiled and run on Microsoft Windows Subsystem for Linux using the GNU Fortran compiler.
49 :
50 : module GeoCyclicFit_mod
51 :
52 : use Optimization_mod, only: PowellMinimum_type
53 : use Constants_mod, only: IK, RK
54 : implicit none
55 :
56 :
57 : character(len=*), parameter :: MODULE_NAME = "@GeoCyclicFit_mod"
58 :
59 : #if defined OS_IS_WSL
60 : private :: getSumDistSq
61 : integer(IK) :: numTrial_WSL !< This madness bypasses the Microsoft Subsystem for Linux Internal Function call GFortran Segmentation Fault error.
62 : integer(IK) :: maxNumTrial_WSL !< This madness bypasses the Microsoft Subsystem for Linux Internal Function call GFortran Segmentation Fault error.
63 : integer(IK) , allocatable :: SuccessStep_WSL(:) !< This madness bypasses the Microsoft Subsystem for Linux Internal Function call GFortran Segmentation Fault error.
64 : real(RK) , allocatable :: LogCount_WSL(:) !< This madness bypasses the Microsoft Subsystem for Linux Internal Function call GFortran Segmentation Fault error.
65 : #endif
66 :
67 : type :: GeoCyclicFit_type
68 : type(PowellMinimum_type) :: PowellMinimum
69 : contains
70 : procedure, nopass :: fit => fitGeoCyclicLogPDF
71 : end type GeoCyclicFit_type
72 :
73 : !%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
74 :
75 : contains
76 :
77 : !%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
78 :
79 : !> \brief
80 : !> Return a fit of the Cyclic Geometric distribution PDF to the input natural logarithm of a sequence of Counts,
81 : !> the `i`th element of which represents the number of successes after `SuccessStep(i)` tries in a Bernoulli trail.
82 : !>
83 : !> \param[in] maxNumTrial : The maximum number of trails possible. After `maxNumTrial` tries,
84 : ! the Geometric distribution restarts from index `1`.
85 : !> \param[in] numTrial : The length of the array `SuccessStep` and `LogCount`.
86 : !> Note that `numTrial < maxNumTrial` must hold.
87 : !> \param[in] SuccessStep : A vector of length `(1:numTrial)` of integers that represent
88 : !> the steps at which the Bernoulli successes occur.
89 : !> \param[in] LogCount : A real-valued vector of length `(1:numTrial)` representing the natural logarithms of the
90 : !> counts of success at the corresponding Bernoulli trials specified by elements of `SuccessStep`.
91 : !>
92 : !> \return
93 : !> `PowellMinimum` : An object of class [PowellMinimum_type](@ref optimization_mod::powellminimum_type) containing
94 : !> the best-fit successProb and the normalization constant of the fit in the vector component `xmin`.
95 : !>
96 : !> \warning
97 : !> Any value of SuccessStep must be an integer numbers between `1` and `maxNumTrial`.
98 : !> The onus is on the user to ensure this condition holds.
99 : !>
100 : !> \todo
101 : !> Update: Amir Shahmoradi, Sunday Nov 29, 2020, 11:19 pm, Dallas, TX
102 : !> The current implementation of the objective function relies on the definitions of module variables.
103 : !> Although inefficient and ugly, this was necessary to resolve the viscous Segmentation Fault error
104 : !> that happens with internal function calls on Windows Subsystem for Linux Ubuntu with GFortran.
105 : !> Once this error of unknown origin is resolved, the external module function `getSumDistSq()`
106 : !> must be converted back to an internal function within [fitGeoCyclicLogPDF](@ref fitgeocycliclogpdf)
107 : !> and subsequently, all module variables must be removed.
108 : !> update (Dec 16, 2020):
109 : !> The source of the error was identified to be a bug in WSL1.
110 : !> This problem is now resolved in WSL2. However, the code will
111 : !> be kept intact for future compatibility with WSL1 and those who still use it.
112 : !>
113 : !> \author
114 : !> Amir Shahmoradi, Monday March 6, 2017, 3:22 pm, ICES, The University of Texas at Austin.
115 3 : function fitGeoCyclicLogPDF(maxNumTrial, numTrial, SuccessStep, LogCount) result(PowellMinimum)
116 : #if INTEL_COMPILER_ENABLED && defined DLL_ENABLED && (OS_IS_WINDOWS || defined OS_IS_DARWIN)
117 : !DEC$ ATTRIBUTES DLLEXPORT :: fitGeoCyclicLogPDF
118 : #endif
119 : use Optimization_mod, only: PowellMinimum_type
120 : use Constants_mod, only: IK, RK
121 : implicit none
122 : integer(IK) , intent(in) :: maxNumTrial
123 : integer(IK) , intent(in) :: numTrial
124 : integer(IK) , intent(in) :: SuccessStep(numTrial)
125 : real(RK) , intent(in) :: LogCount(numTrial)
126 : type(PowellMinimum_type) :: PowellMinimum
127 :
128 : real(RK) :: BestFitSuccessProbNormFac(2) ! vector of the two parameters
129 : real(RK) , parameter :: SUCCESS_PROB_INIT_GUESS = 0.23_RK
130 : real(RK) , parameter :: FISHER_TRANS_SUCCESS_PROB_INIT_GUESS = atanh(2*(SUCCESS_PROB_INIT_GUESS - 0.5_RK))
131 :
132 : #if defined OS_IS_WSL
133 : numTrial_WSL = numTrial
134 : maxNumTrial_WSL = maxNumTrial
135 : SuccessStep_WSL = SuccessStep
136 : LogCount_WSL = LogCount
137 : #endif
138 :
139 : ! do Fisher transformation to make the limits infinity.
140 3 : BestFitSuccessProbNormFac = [FISHER_TRANS_SUCCESS_PROB_INIT_GUESS, 0._RK] ! LogCount(1)]
141 :
142 : PowellMinimum = PowellMinimum_type ( ndim = 2_IK &
143 : , getFuncMD = getSumDistSq &
144 : , StartVec = BestFitSuccessProbNormFac &
145 3 : )
146 3 : if (PowellMinimum%Err%occurred) return
147 3 : PowellMinimum%xmin(1) = 0.5_RK * tanh(PowellMinimum%xmin(1)) + 0.5_RK ! reverse Fisher-transform
148 :
149 : #if !defined OS_IS_WSL
150 : contains
151 :
152 : !! doxygen has problems digesting the documentation of Fortran internal functions.
153 : !!> \brief
154 : !!>
155 : !!> \param[in] ndim : The length of the input vector `successProbFisherTransNormFac`.
156 : !!> \param[in] successProbFisherTransNormFac : The length of the input vector `successProbFisherTransNormFac`.
157 : !!>
158 : !!> \return
159 : !!> `sumDistSq` : The sum of distances squared.
160 : !!>
161 : !!> \remark
162 : !!> Although `successProbFisherTransNormFac` is a vector on input, it is expected to have a length of one at all times.
163 : !!> This is solely to fullfile the interface restrictions of [PowellMinimum_type](@ref optimization_mod::powellminimum_type).
164 1331 : pure function getSumDistSq(ndim,successProbFisherTransNormFac) result(sumDistSq)
165 3 : use Statistics_mod, only: getLogProbGeoCyclic
166 : !use Constants_mod, only: IK, RK
167 : implicit none
168 : integer(IK) , intent(in) :: ndim
169 : real(RK) , intent(in) :: successProbFisherTransNormFac(ndim)
170 : real(RK) :: sumDistSq
171 : !sumDistSq = sum( (LogCount - getGeoLogPDF(successProb=successProb,seqLen=numTrial) - successProbFisherTransNormFac(2) )**2 )
172 : sumDistSq = sum( ( LogCount & ! LCOV_EXCL_LINE
173 : - getLogProbGeoCyclic ( successProb = 0.5_RK * tanh(successProbFisherTransNormFac(1)) + 0.5_RK & ! reverse Fisher-transform ! LCOV_EXCL_LINE
174 : , maxNumTrial = maxNumTrial & ! LCOV_EXCL_LINE
175 : , numTrial = numTrial & ! LCOV_EXCL_LINE
176 : , SuccessStep = SuccessStep & ! LCOV_EXCL_LINE
177 : ) &
178 1331 : - successProbFisherTransNormFac(2) &
179 : )**2 &
180 52454 : )
181 1331 : end function getSumDistSq
182 : #endif
183 : end function fitGeoCyclicLogPDF
184 :
185 : !%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
186 :
187 : #if defined OS_IS_WSL
188 : !> \brief
189 : !> Return the sum of the distances squared from the current fit.
190 : !>
191 : !> \param[in] ndim : The length of the input vector `successProbFisherTransNormFac`.
192 : !> \param[in] successProbFisherTransNormFac : The length of the input vector `successProbFisherTransNormFac`.
193 : !>
194 : !> \return
195 : !> `sumDistSq` : The sum of distances squared.
196 : !>
197 : !> \remark
198 : !> Although `successProbFisherTransNormFac` is a vector on input, it is expected to have a length of one at all times.
199 : !> This is solely to fullfile the interface restrictions of [PowellMinimum_type](@ref optimization_mod::powellminimum_type).
200 : pure function getSumDistSq(ndim,successProbFisherTransNormFac) result(sumDistSq)
201 : #if INTEL_COMPILER_ENABLED && defined DLL_ENABLED && (OS_IS_WINDOWS || defined OS_IS_DARWIN)
202 : !DEC$ ATTRIBUTES DLLEXPORT :: getSumDistSq
203 : #endif
204 : use Statistics_mod, only: getLogProbGeoCyclic
205 : use Constants_mod, only: IK, RK
206 : implicit none
207 : integer(IK) , intent(in) :: ndim
208 : real(RK) , intent(in) :: successProbFisherTransNormFac(ndim)
209 : real(RK) :: sumDistSq
210 : !sumDistSq = sum( (LogCount - getGeoLogPDF(successProb=successProb,seqLen=numTrial) - successProbFisherTransNormFac(2) )**2 )
211 : sumDistSq = sum( ( LogCount_WSL & ! LCOV_EXCL_LINE
212 : - getLogProbGeoCyclic ( successProb = 0.5_RK * tanh(successProbFisherTransNormFac(1)) + 0.5_RK & ! reverse Fisher-transform ! LCOV_EXCL_LINE
213 : , maxNumTrial=maxNumTrial_WSL & ! LCOV_EXCL_LINE
214 : , numTrial=numTrial_WSL & ! LCOV_EXCL_LINE
215 : , SuccessStep=SuccessStep_WSL & ! LCOV_EXCL_LINE
216 : ) &
217 : - successProbFisherTransNormFac(2) &
218 : )**2 &
219 : )
220 : end function getSumDistSq
221 : #endif
222 :
223 : !%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
224 :
225 : ! !> \brief
226 : ! !> Return a fit of the Geometric distribution PDF to the input natural logarithm of a sequence of Counts,
227 : ! !> the `i`th element of which represents the number of successes after `SuccessStep(i)` tries in a Bernoulli trail.
228 : ! !>
229 : ! !> \param[in] numTrial : The number of trials. The length of the input vector `LogCount`.
230 : ! !> \param[in] SuccessStep : The vector of trials of length `numTrial` at which the first success happens.
231 : ! !> \param[in] LogCount : A vector of real values representing the natural logarithms of the counts
232 : ! !> of success at each Bernoulli trial, sequentially, from `1` to `numTrial`.
233 : ! !>
234 : ! !> \return
235 : ! !> `PowellMinimum` : An object of class [PowellMinimum_type](@ref optimization_mod::powellminimum_type) containing
236 : ! !> the best-fit successProb and the normalization constant of the fit in the vector component `xmin`.
237 : ! !>
238 : ! !> \author
239 : ! !> Amir Shahmoradi, Monday March 6, 2017, 3:22 pm, ICES, The University of Texas at Austin.
240 : ! function fitGeoLogPDF_old(numTrial, SuccessStep, LogCount) result(PowellMinimum)
241 : !#if INTEL_COMPILER_ENABLED && defined DLL_ENABLED && (OS_IS_WINDOWS || defined OS_IS_DARWIN)
242 : ! !DEC$ ATTRIBUTES DLLEXPORT :: fitGeoLogPDF_old
243 : !#endif
244 : ! use Optimization_mod, only: PowellMinimum_type
245 : ! use Constants_mod, only: IK, RK, POSINF_RK, NEGINF_RK
246 : ! implicit none
247 : ! integer(IK) , intent(in) :: numTrial
248 : ! integer(IK) , intent(in) :: SuccessStep(numTrial)
249 : ! real(RK) , intent(in) :: LogCount(numTrial)
250 : ! type(PowellMinimum_type) :: PowellMinimum
251 : !
252 : ! real(RK) :: BestFitSuccessProbNormFac(2) ! vector of the two parameters
253 : ! real(RK) , parameter :: SUCCESS_PROB_INIT_GUESS = 0.23_RK
254 : ! real(RK) , parameter :: FISHER_TRANS_SUCCESS_PROB_INIT_GUESS = atanh(2*(SUCCESS_PROB_INIT_GUESS - 0.5_RK))
255 : !
256 : ! ! do Fisher transformation to make the limits infinity
257 : ! BestFitSuccessProbNormFac = [FISHER_TRANS_SUCCESS_PROB_INIT_GUESS, LogCount(1)]
258 : !
259 : ! PowellMinimum = PowellMinimum_type ( ndim = 2_IK &
260 : ! , getFuncMD = getSumDistSq &
261 : ! , StartVec = BestFitSuccessProbNormFac &
262 : ! )
263 : ! if (PowellMinimum%Err%occurred) return
264 : ! PowellMinimum%xmin(1) = 0.5_RK * tanh(PowellMinimum%xmin(1)) + 0.5_RK ! reverse Fisher-transform
265 : !
266 : ! contains
267 : !
268 : ! function getSumDistSq(ndim,successProbFisherTransNormFac) result(sumDistSq)
269 : ! !use Constants_mod, only: IK, RK
270 : ! implicit none
271 : ! integer(IK) , intent(in) :: ndim
272 : ! real(RK) , intent(in) :: successProbFisherTransNormFac(ndim)
273 : ! real(RK) :: sumDistSq, successProb
274 : ! successProb = 0.5_RK*tanh(successProbFisherTransNormFac(1)) + 0.5_RK ! reverse Fisher-transform
275 : ! !sumDistSq = sum( (LogCount - getGeoLogPDF(successProb=successProb,seqLen=numTrial) - successProbFisherTransNormFac(2) )**2 )
276 : ! sumDistSq = sum( ( LogCount &
277 : ! - numTrial * successProbFisherTransNormFac(2) &
278 : ! - getLogProbGeo(numTrial = numTrial, SuccessStep = SuccessStep, successProb = successProb) &
279 : ! )**2 &
280 : ! )
281 : ! end function getSumDistSq
282 : !
283 : ! end function fitGeoLogPDF_old
284 :
285 : !%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
286 :
287 : end module GeoCyclicFit_mod ! LCOV_EXCL_LINE
|