Source code for paramonte._ParaDRAMChain

####################################################################################################################################
####################################################################################################################################
####
####   ParaMonte: plain powerful parallel Monte Carlo library.
####
####   Copyright (C) 2012-present, The Computational Data Science Lab
####
####   This file is part of the ParaMonte library.
####
####   ParaMonte is free software: you can redistribute it and/or modify it
####   under the terms of the GNU Lesser General Public License as published
####   by the Free Software Foundation, version 3 of the License.
####
####   ParaMonte is distributed in the hope that it will be useful,
####   but WITHOUT ANY WARRANTY; without even the implied warranty of
####   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
####   GNU Lesser General Public License for more details.
####
####   You should have received a copy of the GNU Lesser General Public License
####   along with the ParaMonte library. If not, see,
####
####       https://github.com/cdslaborg/paramonte/blob/master/LICENSE
####
####   ACKNOWLEDGMENT
####
####   As per the ParaMonte library license agreement terms,
####   if you use any parts of this library for any purposes,
####   we ask you to acknowledge the use of the ParaMonte library
####   in your work (education/research/industry/development/...)
####   by citing the ParaMonte library as described on this page:
####
####       https://github.com/cdslaborg/paramonte/blob/master/ACKNOWLEDGMENT.md
####
####################################################################################################################################
####################################################################################################################################

import numpy as _np
import typing as _tp
import pandas as _pd

import _paramonte as _pm

_timer = _pm.utils.Timer(_methodName=_pm.names.paradram)

####################################################################################################################################
#### _ParaDRAMChain class
####################################################################################################################################

[docs]class _Struct:
    pass

[docs]class _ParaDRAMChain:
    """

    .. py:class:: _ParaDRAMChain

    This is the _ParaDRAMChain class for generating instances 
    of ParaDRAM sample/chain. The ParaDRAM class's ``readSample()`` or 
    ``readChain()`` or ``readMarkovChain()`` methods return an object or 
    a list of objects of class ``_ParaDRAMChain``.

        **Parameters**

            file
                full path to the file containing the sample/chain.

            delimiter
                the delimiter used in the sample/chain file, which 
                must be provided by the user.

            parseContents
                If set to ``True``, the contents of the file will be parsed and 
                stored in a component of the object named ``contents``.
                The default value is True.

            markovChainRequested
                boolean value indicating weather the full Markov Chain
                has to be generated from the potentially-weighted sample 
                in the input file. If True, each sampled state in the resulting 
                output dataframe is guaranteed to have a weight of 1.

        **Attributes**

            file
                full path to the file containing the sample/chain.

            delimiter
                the delimiter used in the sample/chain file, which 
                must be provided by the user.

            ndim
                number of dimensions of the domain of the objective 
                function from which the sample has been drawn.

            count
                number of points (states) in the sample/chain file. 
                This is essentially, the number of rows in the file 
                minus one (representing the header line).

            [df]
                if the input file contents is structured in a format that
                could be read as a dataframe, then the contents of the file
                will be stored in the form of a pandas-library DataFrame 
                in this property (hence called ``df``).

            [contents]
                if the input file contents is structured in the form of columns,
                then a property named ``contents`` is also added to the object.
                Each component of contents will named via the header of the file
                and will contain data from the corresponding column of the file.

        **Returns**

            outputChain
                an object of class ``_ParaDRAMChain``

    ----------------------------------------------------------------------
    """

    def __init__( self
                , file
                , delimiter
                , parseContents = True
                , markovChainRequested = False
                , mpiDisabled = True
                ):

        ############################################################################################################################
        #### data
        ############################################################################################################################

        self.file = file
        self.delimiter = delimiter

        #with open(file, 'r') as targetFile: Line = targetFile.readlines()
        #self.colHeaderList = Line[0][:-1].split(delimiter)
        #ndimPlusOffset = len(self.colHeaderList)
        #self.dataMat = _np.zeros( (self.count,ndimPlusOffset) )
        #for isample,line in enumerate(Line[1:]): # ignore the header line
        #    self.dataMat[isample][0:ndimPlusOffset] = _np.array( line[:-1].split(delimiter) )
        #self.dataMat = self.dataMat.T

        _timer.tic( msg = "reading file contents... " )

        self.df =_pd.read_csv   ( self.file
                                , delimiter = self.delimiter
                                , header = 0
                                )
        self._offset = list(self.df.columns).index("SampleLogFunc") + 1 # index of the first variable
        self.ndim   = len(self.df.columns) - self._offset
        self.count  = len(self.df.iloc[:,1])

        if markovChainRequested:

            CumSumWeight = _np.cumsum(self.df.iloc[:,self._offset-2].values, dtype=_np.int32)
            if CumSumWeight[-1] != self.count: # it is indeed a compact chain
                #dfMarkov = _pd.DataFrame( columns=list(self.df.columns), index=list(range(CumSumWeight[-1])) )
                dfMarkov = _np.zeros( (CumSumWeight[-1] , self.ndim+self._offset) )
                istart = 0
                for i in range(self.count):
                    iend = CumSumWeight[i]
                    #dfMarkov.iloc[istart:iend,:] = self.df.iloc[i].values
                    dfMarkov[istart:iend,:] = self.df.iloc[i].values
                    istart = iend
                columns = self.df.columns
                self.df = _pd.DataFrame(dfMarkov)
                self.count = len(self.df.iloc[:,1])
                self.df.columns = columns

        _timer.toc()

        if not mpiDisabled:
            _pm.note( msg = "ndim = " + str(self.ndim) + ", count = " + str(self.count)
                    , methodName = _pm.names.paradram
                    , marginTop = 0
                    , marginBot = 1
                    , end = ""
                    )

        # set dynamic properties

        if parseContents:
            _timer.tic( msg = "parsing file contents... " )
            self.contents = _Struct()
            for icol, colName in enumerate(self.df.columns):
                setattr ( self.contents, colName, self.df[colName] )
            _timer.toc()

        ############################################################################################################################
        #### statistics
        ############################################################################################################################

        self.stats = _Struct()
        
        # add chain cormat

        self.stats.cormat = _pm.stats.CorMat( dataFrame     = self.df
                                            , columns       = range(self._offset,self._offset+self.ndim)
                                            , method        = "pearson"
                                            )
        
        _timer.tic( msg = "computing sample correlation matrix... " )
        self.stats.cormat()
        _timer.toc()

        # add chain covmat

        self.stats.covmat = _pm.stats.CovMat( dataFrame     = self.df
                                            , columns       = range(self._offset,self._offset+self.ndim)
                                            )

        _timer.tic( msg = "computing sample covariance matrix... " )
        self.stats.covmat()
        _timer.toc()

        self.stats.maxLogFunc = _pm.stats.getMaxLogFunc(dataFrame = self.df)
        #self.stats.maxLogFunc = _Struct()
        #self.stats.maxLogFunc.idrow = self.df[["SampleLogFunc"]].idxmax().values[0]
        #self.stats.maxLogFunc.value = self.df[["SampleLogFunc"]].iat[self.stats.maxLogFunc.idrow,0]
        #self.stats.maxLogFunc.dfrow = self.df.iloc[self.stats.maxLogFunc.idrow,:]
        #self.stats.maxLogFunc.state = self.df.iloc[self.stats.maxLogFunc.idrow,self._offset:]

        # add chain autocorrelation

        self.stats.acf = _pm.stats.AutoCorr ( dataFrame     = self.df
                                            , columns       = range(self._offset-1,self._offset+self.ndim)
                                            )

        _timer.tic( msg = "computing autocorrelations... " )
        self.stats.acf()
        _timer.toc()

#!DEC$ ifdef PMVIS_ENABLED

        ############################################################################################################################
        #### graphics
        ############################################################################################################################

        _timer.tic( msg = "adding graphics tools... " )

        # add HistPlot

        self.plot = _Struct()

        self.plot.hist = _pm.vis.HistPlot   ( dataFrame = self.df
                                            , columns = self.df.columns[self._offset:]
                                            )

        # add LinePlot

        self.plot.line = _pm.vis.LinePlot   ( dataFrame = self.df
                                            , ycolumns = self.df.columns[self._offset:]
                                            , ccolumns = "SampleLogFunc"
                                            , lc_kws =  {
                                                        #"linewidth":0.75,
                                                        #"cmap":"viridis",
                                                        "cmap":"autumn",
                                                        #"alpha":0.5,
                                                        }
                                            , colorbar_kws =    {
                                                                "extend":"neither",
                                                                "orientation":"vertical",
                                                                #"spacing":"uniform",
                                                                }
                                            #, legend_kws = None
                                            )

        # add ScatterPlot

        self.plot.scatter = _pm.vis.ScatterPlot ( dataFrame = self.df
                                                , ycolumns = self.df.columns[self._offset:]
                                                , ccolumns = "SampleLogFunc"
                                                #, scatter_kws = {}
                                                , colorbar_kws =    {
                                                                    "extend":"neither",
                                                                    "orientation":"vertical",
                                                                    #"spacing":"uniform",
                                                                    }
                                                #, legend_kws = None
                                                )

        # add DensityMapPlot

        xindex = self._offset
        yindex = self._offset + 1
        if self.ndim==1: xindex, yindex = yindex-1, xindex-1
            
        self.plot.density = _pm.vis.DensityMapPlot  ( dataFrame = self.df
                                                    , xcolumn = xindex
                                                    , ycolumn = yindex
                                                    )
        #print(self.stats.maxLogFunc.idrow,xindex,yindex)
        #print(self.df)
        #print(self.df.iat[self.stats.maxLogFunc.idrow,xindex])
        #print(self.df.iat[self.stats.maxLogFunc.idrow,yindex])
        self.plot.density.target.__init__   ( value = [ self.df.iat[self.stats.maxLogFunc.idrow,xindex], self.df.iat[self.stats.maxLogFunc.idrow,yindex] ]
                                            , scatter_kws = {"label":"maxLogFunc"}
                                            )

        # add GridPlot

        endColindex = _np.min( [self._offset+3, self._offset+self.ndim] )
        self.plot.grid = _pm.vis.GridPlot   ( dataFrame = self.df
                                            , columns = self.df.columns[self._offset:endColindex]
                                            , scatterplot_kws = {"ccolumns": "SampleLogFunc"}
                                            , _methodName = _pm.names.paradram
                                            )

        # add ScatterLinePlot

#       self.plot._scatterline = _pm.vis.ScatterLinePlot( dataFrame = self.df
#                                                       , ycolumns = self.df.columns[self._offset:]
#                                                       , lccolumns = "SampleLogFunc"
#                                                       , lc_kws =  {
#                                                                   #"linewidth":0.75,
#                                                                   #"cmap":"viridis",
#                                                                   "cmap":"autumn",
#                                                                   #"alpha":0.5,
#                                                                   }
#                                                       #, scatter_kws = {}
#                                                       , colorbar_kws =    {
#                                                                           "extend":"neither",
#                                                                           "orientation":"vertical",
#                                                                           #"spacing":"uniform",
#                                                                           }
#                                                       #, legend_kws = None
#                                                       )

        _timer.toc()

    ################################################################################################################################

#!DEC$ endif