Source code for paramonte._ParaDRAMChain

####################################################################################################################################
####################################################################################################################################
####
####   ParaMonte: plain powerful parallel Monte Carlo library.
####
####   Copyright (C) 2012-present, The Computational Data Science Lab
####
####   This file is part of the ParaMonte library.
####
####   ParaMonte is free software: you can redistribute it and/or modify it
####   under the terms of the GNU Lesser General Public License as published
####   by the Free Software Foundation, version 3 of the License.
####
####   ParaMonte is distributed in the hope that it will be useful,
####   but WITHOUT ANY WARRANTY; without even the implied warranty of
####   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
####   GNU Lesser General Public License for more details.
####
####   You should have received a copy of the GNU Lesser General Public License
####   along with the ParaMonte library. If not, see,
####
####       https://github.com/cdslaborg/paramonte/blob/master/LICENSE
####
####   ACKNOWLEDGMENT
####
####   As per the ParaMonte library license agreement terms,
####   if you use any parts of this library for any purposes,
####   we ask you to acknowledge the use of the ParaMonte library
####   in your work (education/research/industry/development/...)
####   by citing the ParaMonte library as described on this page:
####
####       https://github.com/cdslaborg/paramonte/blob/master/ACKNOWLEDGMENT.md
####
####################################################################################################################################
####################################################################################################################################

import numpy as _np
import typing as _tp
import pandas as _pd

import _paramonte as _pm

_timer = _pm.utils.Timer(_methodName=_pm.names.paradram)

####################################################################################################################################
#### _ParaDRAMChain class
####################################################################################################################################

[docs]class _Struct: pass
[docs]class _ParaDRAMChain: """ .. py:class:: _ParaDRAMChain This is the _ParaDRAMChain class for generating instances of ParaDRAM sample/chain. The ParaDRAM class's ``readSample()`` or ``readChain()`` or ``readMarkovChain()`` methods return an object or a list of objects of class ``_ParaDRAMChain``. **Parameters** file full path to the file containing the sample/chain. delimiter the delimiter used in the sample/chain file, which must be provided by the user. parseContents If set to ``True``, the contents of the file will be parsed and stored in a component of the object named ``contents``. The default value is True. markovChainRequested boolean value indicating weather the full Markov Chain has to be generated from the potentially-weighted sample in the input file. If True, each sampled state in the resulting output dataframe is guaranteed to have a weight of 1. **Attributes** file full path to the file containing the sample/chain. delimiter the delimiter used in the sample/chain file, which must be provided by the user. ndim number of dimensions of the domain of the objective function from which the sample has been drawn. count number of points (states) in the sample/chain file. This is essentially, the number of rows in the file minus one (representing the header line). [df] if the input file contents is structured in a format that could be read as a dataframe, then the contents of the file will be stored in the form of a pandas-library DataFrame in this property (hence called ``df``). [contents] if the input file contents is structured in the form of columns, then a property named ``contents`` is also added to the object. Each component of contents will named via the header of the file and will contain data from the corresponding column of the file. **Returns** outputChain an object of class ``_ParaDRAMChain`` ---------------------------------------------------------------------- """ def __init__( self , file , delimiter , parseContents = True , markovChainRequested = False , mpiDisabled = True ): ############################################################################################################################ #### data ############################################################################################################################ self.file = file self.delimiter = delimiter #with open(file, 'r') as targetFile: Line = targetFile.readlines() #self.colHeaderList = Line[0][:-1].split(delimiter) #ndimPlusOffset = len(self.colHeaderList) #self.dataMat = _np.zeros( (self.count,ndimPlusOffset) ) #for isample,line in enumerate(Line[1:]): # ignore the header line # self.dataMat[isample][0:ndimPlusOffset] = _np.array( line[:-1].split(delimiter) ) #self.dataMat = self.dataMat.T _timer.tic( msg = "reading file contents... " ) self.df =_pd.read_csv ( self.file , delimiter = self.delimiter , header = 0 ) self._offset = list(self.df.columns).index("SampleLogFunc") + 1 # index of the first variable self.ndim = len(self.df.columns) - self._offset self.count = len(self.df.iloc[:,1]) if markovChainRequested: CumSumWeight = _np.cumsum(self.df.iloc[:,self._offset-2].values, dtype=_np.int32) if CumSumWeight[-1] != self.count: # it is indeed a compact chain #dfMarkov = _pd.DataFrame( columns=list(self.df.columns), index=list(range(CumSumWeight[-1])) ) dfMarkov = _np.zeros( (CumSumWeight[-1] , self.ndim+self._offset) ) istart = 0 for i in range(self.count): iend = CumSumWeight[i] #dfMarkov.iloc[istart:iend,:] = self.df.iloc[i].values dfMarkov[istart:iend,:] = self.df.iloc[i].values istart = iend columns = self.df.columns self.df = _pd.DataFrame(dfMarkov) self.count = len(self.df.iloc[:,1]) self.df.columns = columns _timer.toc() if not mpiDisabled: _pm.note( msg = "ndim = " + str(self.ndim) + ", count = " + str(self.count) , methodName = _pm.names.paradram , marginTop = 0 , marginBot = 1 , end = "" ) # set dynamic properties if parseContents: _timer.tic( msg = "parsing file contents... " ) self.contents = _Struct() for icol, colName in enumerate(self.df.columns): setattr ( self.contents, colName, self.df[colName] ) _timer.toc() ############################################################################################################################ #### statistics ############################################################################################################################ self.stats = _Struct() # add chain cormat self.stats.cormat = _pm.stats.CorMat( dataFrame = self.df , columns = range(self._offset,self._offset+self.ndim) , method = "pearson" ) _timer.tic( msg = "computing sample correlation matrix... " ) self.stats.cormat() _timer.toc() # add chain covmat self.stats.covmat = _pm.stats.CovMat( dataFrame = self.df , columns = range(self._offset,self._offset+self.ndim) ) _timer.tic( msg = "computing sample covariance matrix... " ) self.stats.covmat() _timer.toc() self.stats.maxLogFunc = _pm.stats.getMaxLogFunc(dataFrame = self.df) #self.stats.maxLogFunc = _Struct() #self.stats.maxLogFunc.idrow = self.df[["SampleLogFunc"]].idxmax().values[0] #self.stats.maxLogFunc.value = self.df[["SampleLogFunc"]].iat[self.stats.maxLogFunc.idrow,0] #self.stats.maxLogFunc.dfrow = self.df.iloc[self.stats.maxLogFunc.idrow,:] #self.stats.maxLogFunc.state = self.df.iloc[self.stats.maxLogFunc.idrow,self._offset:] # add chain autocorrelation self.stats.acf = _pm.stats.AutoCorr ( dataFrame = self.df , columns = range(self._offset-1,self._offset+self.ndim) ) _timer.tic( msg = "computing autocorrelations... " ) self.stats.acf() _timer.toc() #!DEC$ ifdef PMVIS_ENABLED ############################################################################################################################ #### graphics ############################################################################################################################ _timer.tic( msg = "adding graphics tools... " ) # add HistPlot self.plot = _Struct() self.plot.hist = _pm.vis.HistPlot ( dataFrame = self.df , columns = self.df.columns[self._offset:] ) # add LinePlot self.plot.line = _pm.vis.LinePlot ( dataFrame = self.df , ycolumns = self.df.columns[self._offset:] , ccolumns = "SampleLogFunc" , lc_kws = { #"linewidth":0.75, #"cmap":"viridis", "cmap":"autumn", #"alpha":0.5, } , colorbar_kws = { "extend":"neither", "orientation":"vertical", #"spacing":"uniform", } #, legend_kws = None ) # add ScatterPlot self.plot.scatter = _pm.vis.ScatterPlot ( dataFrame = self.df , ycolumns = self.df.columns[self._offset:] , ccolumns = "SampleLogFunc" #, scatter_kws = {} , colorbar_kws = { "extend":"neither", "orientation":"vertical", #"spacing":"uniform", } #, legend_kws = None ) # add DensityMapPlot xindex = self._offset yindex = self._offset + 1 if self.ndim==1: xindex, yindex = yindex-1, xindex-1 self.plot.density = _pm.vis.DensityMapPlot ( dataFrame = self.df , xcolumn = xindex , ycolumn = yindex ) #print(self.stats.maxLogFunc.idrow,xindex,yindex) #print(self.df) #print(self.df.iat[self.stats.maxLogFunc.idrow,xindex]) #print(self.df.iat[self.stats.maxLogFunc.idrow,yindex]) self.plot.density.target.__init__ ( value = [ self.df.iat[self.stats.maxLogFunc.idrow,xindex], self.df.iat[self.stats.maxLogFunc.idrow,yindex] ] , scatter_kws = {"label":"maxLogFunc"} ) # add GridPlot endColindex = _np.min( [self._offset+3, self._offset+self.ndim] ) self.plot.grid = _pm.vis.GridPlot ( dataFrame = self.df , columns = self.df.columns[self._offset:endColindex] , scatterplot_kws = {"ccolumns": "SampleLogFunc"} , _methodName = _pm.names.paradram ) # add ScatterLinePlot # self.plot._scatterline = _pm.vis.ScatterLinePlot( dataFrame = self.df # , ycolumns = self.df.columns[self._offset:] # , lccolumns = "SampleLogFunc" # , lc_kws = { # #"linewidth":0.75, # #"cmap":"viridis", # "cmap":"autumn", # #"alpha":0.5, # } # #, scatter_kws = {} # , colorbar_kws = { # "extend":"neither", # "orientation":"vertical", # #"spacing":"uniform", # } # #, legend_kws = None # ) _timer.toc()
################################################################################################################################ #!DEC$ endif