Source code for ipca.ipca

from sklearn.base import BaseEstimator, RegressorMixin
from sklearn.model_selection import GroupKFold
from sklearn.linear_model import ElasticNet
from sklearn.metrics import r2_score
from joblib import Parallel, delayed
from numba import jit
import pandas as pd
import numpy as np
import scipy as sp
import progressbar
import warnings
import time

[docs]class InstrumentedPCA(BaseEstimator):
    """
    This class implements the IPCA algorithm by Kelly, Pruitt, Su (2017).

    Parameters
    ----------

    n_factors : int, default=1
        The total number of factors to estimate. Note, the number of
        estimated factors is automatically reduced by the number of
        pre-specified factors. For example, if n_factors = 2 and one
        pre-specified factor is passed, then InstrumentedPCA will estimate
        one factor estimated in addition to the pre-specified factor.

    intercept : boolean, default=False
        Determines whether the model is estimated with or without an intercept

    max_iter : int, default=10000
        Maximum number of alternating least squares updates before the
        estimation is stopped

    iter_tol : float, default=10e-6
        Tolerance threshold for stopping the alternating least squares
        procedure

    alpha : scalar
        Regularizing constant for Gamma estimation.  If this is set to
        zero then the estimation defaults to non-regularized.

    l1_ratio : scalar
        Ratio of l1 and l2 penalties for elastic net Gamma fit.

    n_jobs : scalar
        number of jobs for F step estimation in ALS, if set to one no
        parallelization is done

    backend : str
        label for Joblib backend used for F step in ALS
    """

    def __init__(self, n_factors=1, intercept=False, max_iter=10000,
                 iter_tol=10e-6, alpha=0., l1_ratio=1., n_jobs=1,
                 backend="loky"):

        # paranoid parameter checking to make it easier for users to know when
        # they have gone awry and to make it safe to assume some variables can
        # only have certain settings
        if not isinstance(n_factors, int) or n_factors < 1:
            raise ValueError('n_factors must be an int greater / equal 1.')
        if not isinstance(intercept, bool):
            raise NotImplementedError('intercept must be  boolean')
        if not isinstance(iter_tol, float) or iter_tol >= 1:
            raise ValueError('Iteration tolerance must be smaller than 1.')
        if l1_ratio > 1. or l1_ratio < 0.:
            raise ValueError("l1_ratio must be between 0 and 1")
        if alpha < 0.:
            raise ValueError("alpha must be greater than or equal to 0")

        # Save parameters to the object
        params = locals()
        for k, v in params.items():
            if k != 'self':
                setattr(self, k, v)


[docs]    def fit(self, X, y, indices=None, PSF=None, Gamma=None,
            Factors=None, data_type="portfolio", label_ind=False, **kwargs):
        """
        Fits the regressor to the data using an alternating least squares
        scheme.

        Parameters
        ----------
        X :  numpy array or pandas DataFrame
            matrix of characteristics where each row corresponds to a
            entity-time pair in indices.  The number of characteristics
            (columns here) used as instruments is L.

            If given as a DataFrame, we assume that it contains a MutliIndex
            mapping to each entity-time pair

        y : numpy array or pandas Series
            dependent variable where indices correspond to those in X

            If given as a Series, we assume that it contains a MutliIndex
            mapping to each entity-time pair

        indices : numpy array, optional
            array containing the panel indices.  Should consist of two
            columns:

            - Column 1: entity id (i)
            - Column 2: time index (t)

            The panel may be unbalanced. The number of unique entities is
            n_samples, the number of unique dates is T, and the number of
            characteristics used as instruments is L.

        PSF : numpy array, optional
            Set of pre-specified factors as matrix of dimension (M, T)

        Gamma : numpy array, optional
            If provided, starting values for Gamma (see Notes)

        Factors : numpy array
            If provided, starting values for Factors (see Notes)

        data_type : str
            label for data-type used for ALS estimation, one of the following:

            1. panel

            ALS uses the untransformed X and y for the estimation.

            This is currently marginally slower than the portfolio estimation
            but is necessary when performing regularized estimation
            (alpha > 0).

            2. portfolio

            ALS uses a matrix of characteristic weighted portfolios (Q)
            as well as a matrix of weights (W) and count of non-missing
            observations for each time period (val_obs) for the estimation.

            See _build_portfolio for details on how these variables are formed
            from the initial X and y.

            Currently, the bootstrap procedure is only implemented in terms
            of the portfolio data_type.

        Returns
        -------
        self

        Notes
        -----
        Updates InstrumentedPCA instances to include param estimates:

        Gamma : numpy array
            Array with dimensions (L, n_factors) containing the
            mapping between characteristics and factors loadings. If there
            are M many pre-specified factors in the model then the
            matrix returned is of dimension (L, (n_factors+M)).
            If an intercept is included in the model, its loadings are
            returned in the last column of Gamma.

        Factors : numpy array
            Array with dimensions (n_factors, T) containing the estimated
            factors. If pre-specified factors were passed the returned
            array is of dimension ((n_factors - M), T),
            corresponding to the n_factors - M many factors estimated on
            top of the pre-specified ones.
        """

        # handle input
        X, y, indices, metad = _prep_input(X, y, indices)
        N, L, T = metad["N"], metad["L"], metad["T"]

        # set data_type to panel if doing regularized estimation
        if self.alpha > 0.:
            data_type = "panel"

        # Handle pre-specified factors
        if PSF is not None:
            if np.size(PSF, axis=1) != np.size(metad["dates"]):
                raise ValueError("""Number of PSF observations must match
                                 number of unique dates""")
            self.has_PSF = True
        else:
            self.has_PSF = False

        if self.has_PSF:
            if np.size(PSF, axis=0) == self.n_factors:
                print("""Note: The number of factors (n_factors) to be
                      estimated matches the number of
                      pre-specified factors. No additional factors
                      will be estimated. To estimate additional
                      factors increase n_factors.""")

        #  Treating intercept as if was a prespecified factor
        if self.intercept:
            self.n_factors_eff = self.n_factors + 1
            if PSF is not None:
                PSF = np.concatenate((PSF, np.ones((1, T))), axis=0)
            elif PSF is None:
                PSF = np.ones((1, T))
        else:
            self.n_factors_eff = self.n_factors

        # Check that enough features provided
        if np.size(X, axis=1) < self.n_factors_eff:
            raise ValueError("""The number of factors requested exceeds number
                              of features""")

        # Determine fit case - if intercept or PSF or both use PSFcase fitting
        # Note PSFcase in contrast to has_PSF is only indicating
        # that the IPCA fitting is carried out as if PSF were passed even if
        # only an intercept was passed.
        self.PSFcase = True if self.has_PSF or self.intercept else False

        # store data
        self.X, self.y, self.indices, self.PSF = X, y, indices, PSF
        Q, W, val_obs = _build_portfolio(X, y, indices, metad)
        self.Q, self.W, self.val_obs = Q, W, val_obs
        self.metad = metad

        # Run IPCA
        Gamma, Factors = self._fit_ipca(X=X, y=y, indices=indices, Q=Q,
                                        W=W, val_obs=val_obs, PSF=PSF,
                                        Gamma=Gamma, Factors=Factors,
                                        data_type=data_type, **kwargs)

        # Store estimates
        if self.PSFcase:
            date_ln = len(metad["dates"])
            if self.intercept and self.has_PSF:
                PSF = np.concatenate((PSF, np.ones((1, date_ln))),
                                     axis=0)
            elif self.intercept:
                PSF = np.ones((1, date_ln))
            if Factors is not None:
                Factors = np.concatenate((Factors, PSF), axis=0)
            else:
                Factors = PSF

        self.Gamma, self.Factors = Gamma, Factors

        return self


[docs]    def get_factors(self, label_ind=False):
        """returns a tuple containing Gamma and Factors

        Parameters
        ----------
        label_ind : bool
            if provided we return the factors as pandas DataFrames with
            index info applied

        Returns
        -------
        tuple
            containing Gamma and Factors
        """

        Gamma, Factors = self.Gamma.copy(), self.Factors.copy()
        if label_ind:
            Gamma = pd.DataFrame(Gamma, index=self.metad["chars"])
            Factors = pd.DataFrame(Factors, columns=self.metad["dates"])

        return Gamma, Factors


[docs]    def fit_path(self, X, y, indices=None, PSF=None, alpha_l=None,
                 n_splits=10, split_method=GroupKFold, n_jobs=1,
                 backend="loky", **kwargs):
        """Fit a path of elastic net fits for various regularizing constants

        Parameters
        ----------
        X :  numpy array or pandas DataFrame
            matrix of characteristics where each row corresponds to a
            entity-time pair in indices.  The number of characteristics
            (columns here) used as instruments is L.

            If given as a DataFrame, we assume that it contains a mutliindex
            mapping to each entity-time pair
        y : numpy array or pandas Series
            dependent variable where indices correspond to those in X

            If given as a Series, we assume that it contains a mutliindex
            mapping to each entity-time pair
        indices : numpy array, optional
            array containing the panel indices.  Should consist of two
            columns:

            - Column 1: entity id (i)
            - Column 2: time index (t)

            The panel may be unbalanced. The number of unique entities is
            n_samples, the number of unique dates is T, and the number of
            characteristics used as instruments is L.
        PSF : numpy array, optional
            Set of pre-specified factors as matrix of dimension (M, T)
        alpha_l : iterable, optional
            list of regularizing constants to use for path
        n_splits : scalar
            number of CV partitions
        split_method : sklearn cross-validation generator factory
            method to generate CV partitions
        n_jobs : scalar
            number of jobs for parallel CV estimation
        backend : str
            label for joblib backend

        Returns
        -------
        cvmse : numpy matrix
            array of dim (P x (C + 1)) where P is the number of reg
            constants and C is the number of CV partitions
        """

        # handle input
        X, y, indices, metad = _prep_input(X, y, indices)

        # handle data type, since we are doing regularized estimation
        # only the panel fit makes sense here
        if "data_type" in kwargs:
            data_type = kwargs.pop("data_type")
        else:
            data_type = "panel"
        if data_type == "portfolio":
            raise ValueError("Unsupported data_type for fit_path: \
                              'portfolio'. Regularized estimation is only \
                              implemented for 'panel' data_type currently")

        # init alphas
        if alpha_l is None:
            alpha_l = np.linspace(0.0, 1., 100)

        # run cross-validation
        if n_jobs > 1:
            cvmse = Parallel(n_jobs=n_jobs, backend=backend)(
                        delayed(_fit_cv)(
                        self, X, y, indices, PSF, n_splits, split_method,
                        alpha, data_type=data_type, **kwargs)
                        for alpha in alpha_l)
        else:
            cvmse = [_fit_cv(self, X, y, indices, PSF, n_splits,
                             split_method, alpha, data_type=data_type,
                             **kwargs)
                     for alpha in alpha_l]

        cvmse = np.stack(cvmse)
        cvmse = np.hstack((alpha_l[:,None], cvmse))

        return cvmse


[docs]    def predict(self, X=None, indices=None, W=None, mean_factor=False,
                data_type="panel", label_ind=False):
        """wrapper around different data type predict methods

        Parameters
        ----------
        X :  numpy array or pandas DataFrame, optional
            matrix of characteristics where each row corresponds to a
            entity-time pair in indices.  The number of characteristics
            (columns here) used as instruments is L.

            If given as a DataFrame, we assume that it contains a mutliindex
            mapping to each entity-time pair

            If None we use the values associated with the current model

        indices : numpy array, optional
            array containing the panel indices.  Should consist of two
            columns:

            - Column 1: entity id (i)
            - Column 2: time index (t)

            The panel may be unbalanced. The number of unique entities is
            n_samples, the number of unique dates is T, and the number of
            characteristics used as instruments is L.

            If None we use the values associated with the current model

        W : numpy array, optional
            portfolio weight matrix of dimension (L, L, T)

            If None, we use the values associated with the current model

        mean_factor: boolean
            If true, the estimated factors are averaged in the time-series
            before prediction.

        data_type : str
            label for data-type used for prediction, one of the following:

            1. panel

            Uses the untransformed X and y for the estimation.

            2. portfolio

            Uses a matrix of characteristic weighted portfolios (Q)
            as well as a matrix of weights (W) and count of non-missing
            observations for each time period (val_obs) for the estimation.

            See _build_portfolio for details on how these variables are formed
            from the initial X and y.

        label_ind : bool
            whether to apply the indices to fitted values and return
            pandas Series

        Returns
        -------
        numpy array or pandas DataFrame/Series
            The exact value returned depends on two things:

            1. The data_type
                a. If panel data_type is specified, this will be a series of
                values for the panel ys

                b. If portfolio data_type is specified, this will a matrix
                of predicted char formed portfolio Qs
            2. label_ind
                If label_ind is True, we return pandas variants of the
                predicted values.  If not, we return the underlying
                numpy arrays.
        """

        if data_type == "panel":

            if X is None:
                X, indices, metad = self.X, self.indices, self.metad
            else:
                X, y, indices, metad = _prep_input(X, None, indices)
            N, L, T = metad["N"], metad["L"], metad["T"]

            pred = self.predict_panel(X, indices, T, mean_factor)

            if label_ind:
                pred = pd.DataFrame(pred, columns=["yhat"])
                ind = pd.DataFrame(indices, columns=["ids", "dates"])
                pred = pd.concat([ind, pred]).set_index(["ids", "dates"])

        elif data_type == "portfolio":

            if W is not None:
                L = W.shape[0]
                T = W.shape[2]
            elif X is not None:
                X, y, indices, metad = _prep_input(X, None, indices)
                Q, W, val_obs = _build_portfolio(X, None, indices, metad)
            elif hasattr(self, "W"):
                W = self.W
                L = W.shape[0]
                T = W.shape[2]
            else:
                X, indices, metad = self.X, self.indices, self.metad
                N, L, T = metad["N"], metad["L"], metad["T"]
                Q, W, val_obs = _build_portfolio(X, None, indices, metad)

            pred = self.predict_portfolio(W, L, T, mean_factor)

            if label_ind:
                pred = pd.DataFrame(pred, index=metad["chars"],
                                    columns=metad["dates"])

        else:
            raise ValueError("Unsupported data_type: %s" % data_type)

        return pred


[docs]    def predict_panel(self, X, indices, T, mean_factor=False):
        """
        Predicts fitted values for a previously fitted regressor + panel data

        Parameters
        ----------
        X :  numpy array
            matrix of characteristics where each row corresponds to a
            entity-time pair in indices.  The number of characteristics
            (columns here) used as instruments is L.

        indices : numpy array
            array containing the panel indices.  Should consist of two
            columns:

            - Column 1: entity id (i)
            - Column 2: time index (t)

            The panel may be unbalanced. The number of unique entities is
            n_samples, the number of unique dates is T, and the number of
            characteristics used as instruments is L.

        T : scalar
            number of time periods in X

        mean_factor: boolean
            If true, the estimated factors are averaged in the time-series
            before prediction.

        Returns
        -------
        ypred : numpy array
            The length of the returned array matches the length of data.
            A nan will be returned if there is missing chars information.
        """

        mean_Factors = np.mean(self.Factors, axis=1).reshape((-1, 1))

        if mean_factor:
            ypred = np.squeeze(X.dot(self.Gamma)\
                .dot(mean_Factors))
        elif T != self.Factors.shape[1]:
            raise ValueError("If mean_factor isn't used date shape must align\
                              with Factors shape")
        else:
            ypred = np.full((X.shape[0]), np.nan)
            for t in range(T):
                ix = (indices[:, 1] == t)
                ypred[ix] = np.squeeze(X[ix, :].dot(self.Gamma)\
                    .dot(self.Factors[:, t]))
        return ypred


[docs]    def predict_portfolio(self, W, L, T, mean_factor=False):
        """
        Predicts fitted values for a previously fitted regressor + portfolios

        Parameters
        ----------
        W : numpy array
            portfolio weight matrix of dimension (L, L, T)

        L : scalar
            number of characteristics

        T : scalar
            number of time periods

        mean_factor: boolean
            If true, the estimated factors are averaged in the time-series
            before prediction.

        Returns
        -------
        Qpred : numpy array
            Same dimensions as a char formed portfolios (Q)
        """

        # Compute goodness of fit measures, portfolio level
        Num_tot, Denom_tot, Num_pred, Denom_pred = 0, 0, 0, 0

        if not mean_factor and T != self.Factors.shape[1]:
            raise ValueError("If mean_factor isn't used date shape must align\
                              with Factors shape")

        if mean_factor:
            mean_Factors = np.mean(self.Factors, axis=1).reshape((-1, 1))

        Qpred = np.full((L, T), np.nan)
        for t in range(T):

            if mean_factor:
                qpred = W[:, :, t].dot(self.Gamma).dot(mean_Factors)
                qpred = np.squeeze(qpred)
                Qpred[:,t] = qpred
            else:
                qpred = W[:, :, t].dot(self.Gamma)\
                    .dot(self.Factors[:, t])
                Qpred[:,t] = qpred

        return Qpred


[docs]    def score(self, X, y=None, indices=None, mean_factor=False,
              data_type="panel"):
        """generate R^2

        Parameters
        ----------
        X :  numpy array or pandas DataFrame
            matrix of characteristics where each row corresponds to a
            entity-time pair in indices.  The number of characteristics
            (columns here) used as instruments is L.

            If given as a DataFrame, we assume that it contains a mutliindex
            mapping to each entity-time pair

            If None we use the values associated with the current model

        y : numpy array or pandas Series, optional
            dependent variable where indices correspond to those in X

            If given as a Series, we assume that it contains a mutliindex
            mapping to each entity-time pair

        indices : numpy array, optional
            array containing the panel indices.  Should consist of two
            columns:

            - Column 1: entity id (i)
            - Column 2: time index (t)

            The panel may be unbalanced. The number of unique entities is
            n_samples, the number of unique dates is T, and the number of
            characteristics used as instruments is L.

            If None we use the values associated with the current model

        mean_factor: boolean
            If true, the estimated factors are averaged in the time-series
            before prediction.

        data_type : str
            label for data-type used for prediction, one of the following:

            1. panel

            Uses the untransformed X and y for the estimation.

            2. portfolio

            Uses a matrix of characteristic weighted portfolios (Q)
            as well as a matrix of weights (W) and count of non-missing
            observations for each time period (val_obs) for the estimation.

            See _build_portfolio for details on how these variables are formed
            from the initial X and y.

        label_ind : bool
            whether to apply the indices to fitted values and return
            pandas Series

        Returns
        -------
        r2 : scalar
            summary of model performance
        """

        if data_type == "panel":

            X, y, indices, metad = _prep_input(X, y, indices)
            if y is None:
                y = self.y

            yhat = self.predict(X=X, indices=indices,
                                mean_factor=mean_factor,
                                data_type="panel")

            return r2_score(y, yhat)

        elif data_type == "portfolio":

            X, y, indices, metad = _prep_input(X, y, indices)
            if y is None:
                y = self.y
            Q, W, val_obs = _build_portfolio(X, y, indices, metad)

            Qhat = self.predict(W=W, mean_factor=mean_factor,
                                data_type="portfolio")
            return r2_score(Q, Qhat)

        else:
            return ValueError("Unsupported data_type: %s" % data_type)


[docs]    def BS_Walpha(self, ndraws=1000, n_jobs=1, backend='loky'):
        """
        Bootstrap inference on the hypothesis Gamma_alpha = 0

        Parameters
        ----------

        ndraws  : integer, default=1000
            Number of bootstrap draws and re-estimations to be performed

        backend : optional
            Value is either 'loky' or 'multiprocessing'

        n_jobs  : integer
            Number of workers to be used. If -1, all available workers are
            used.

        Returns
        -------

        pval : float
            P-value from the hypothesis test H0: Gamma_alpha=0
        """

        if self.alpha > 0.:
            raise ValueError("Bootstrap currently not supported for\
                              regularized estimation.")

        if not self.intercept:
            raise ValueError('Need to fit model with intercept first.')

        # fail if model isn't estimated
        if not hasattr(self, "Q"):
            raise ValueError("Bootstrap can only be run on fitted model.")

        N, L, T = self.metad["N"], self.metad["L"], self.metad["T"]

        # Compute Walpha
        Walpha = self.Gamma[:, -1].T.dot(self.Gamma[:, -1])

        # Compute residuals
        d = np.full((L, T), np.nan)

        for t_i, t in enumerate(self.metad["dates"]):
            d[:, t_i] = self.Q[:, t_i]-self.W[:, :, t_i].dot(self.Gamma)\
                .dot(self.Factors[:, t_i])

        print("Starting Bootstrap...")
        Walpha_b = Parallel(n_jobs=n_jobs, backend=backend, verbose=10)(
            delayed(_BS_Walpha_sub)(self, n, d) for n in range(ndraws))
        print("Done!")

        # print(Walpha_b, Walpha)
        pval = np.sum(Walpha_b > Walpha)/ndraws
        return pval


[docs]    def BS_Wbeta(self, l, ndraws=1000, n_jobs=1, backend='loky'):
        """
        Test of instrument significance.
        Bootstrap inference on the hypothesis  l-th column of Gamma_beta = 0.

        Parameters
        ----------

        l   : integer
            Position of the characteristics for which the bootstrap is to be
            carried out. For example, if there are 10 characteristics, l is in
            the range 0 to 9 (left-/right-inclusive).

        ndraws  : integer, default=1000
            Number of bootstrap draws and re-estimations to be performed

        n_jobs  : integer
            Number of workers to be used for multiprocessing.
            If -1, all available Workers are used.

        backend : optional

        Returns
        -------

        pval : float
            P-value from the hypothesis test H0: Gamma_alpha=0
        """

        if self.alpha > 0.:
            raise ValueError("Bootstrap currently not supported for\
                              regularized estimation.")

        if self.PSFcase:
            raise ValueError('Need to fit model without intercept first.')

        # fail if model isn't estimated
        if not hasattr(self, "Q"):
            raise ValueError("Bootstrap can only be run on fitted model.")

        N, L, T = self.metad["N"], self.metad["L"], self.metad["T"]

        # Compute Wbeta_l if l-th characteristics is set to zero
        Wbeta_l = np.sum(np.square(self.Gamma[l, :]))

        # Compute residuals
        d = np.full((L, T), np.nan)
        for t_i, t in enumerate(self.metad["dates"]):
            d[:, t_i] = self.Q[:, t_i]-self.W[:, :, t_i].dot(self.Gamma)\
                .dot(self.Factors[:, t_i])

        print("Starting Bootstrap...")
        Wbeta_l_b = Parallel(n_jobs=n_jobs, backend=backend, verbose=10)(
            delayed(_BS_Wbeta_sub)(self, n, d, l) for n in range(ndraws))
        print("Done!")

        pval = np.sum(Wbeta_l_b > Wbeta_l)/ndraws

        return pval


[docs]    def BS_Wdelta(self, ndraws=1000, n_jobs=1, backend='loky'):
        """
        Test of PSF significance.
        Bootstrap inference on the hypothesis Gamma_delta = 0. Assumes that only one PSF is used and no intercept is in use.
        Parameters
        ----------
        ndraws  : integer, default=1000
            Number of bootstrap draws and re-estimations to be performed

        n_jobs  : integer
            Number of workers to be used for multiprocessing.
            If -1, all available Workers are used.

        backend : optional

        Returns
        -------
        pval : float
            P-value from the hypothesis test H0: Gamma_delta=0
        """

        if self.alpha > 0.:
            raise ValueError("Bootstrap currently not supported for\
                              regularized estimation.")

        if self.intercept:
            raise ValueError('Need to fit model without intercept first.')

        if not self.PSFcase:
            raise ValueError('Need to fit model with one PSF first.')

        K_PSF, _ = np.shape(self.PSF)

        if K_PSF > 1:
            raise ValueError('Not implemented for more than one PSF yet.')


        # fail if model isn't estimated
        if not hasattr(self, "Q"):
            raise ValueError("Bootstrap can only be run on fitted model.")

        N, L, T = self.metad["N"], self.metad["L"], self.metad["T"]

        # Compute Wdelta if Gamma_delta is set to zero
        Wdelta = self.Gamma[:, -1].T.dot(self.Gamma[:, -1])
        # Compute residuals
        d = np.full((L, T), np.nan)
        for t_i, t in enumerate(self.metad["dates"]):
            d[:, t_i] = self.Q[:, t_i]-self.W[:, :, t_i].dot(self.Gamma)\
                .dot(self.Factors[:, t_i])

        print("Starting Bootstrap...")
        Wdelta_b = Parallel(n_jobs=n_jobs, backend=backend, verbose=10)(
            delayed(_BS_Wdelta_sub)(self, n, d) for n in range(ndraws))
        print("Done!")

        #print(Wdelta_b, Wdelta)
        pval = np.sum(Wdelta_b > Wdelta)/ndraws

        return pval


[docs]    def predictOOS(self, X=None, y=None, indices=None, mean_factor=False):
        """
        Predicts time t+1 observation using an out-of-sample design.

        Parameters
        ----------
        X :  numpy array
            X of stacked data. Each row corresponds to an observation
            (i,t) where i denotes the entity index and t denotes
            the time index. All data must correspond to time t, i.e. all
            observations occur on the same date.
            If an observation contains missing data NaN will be returned.
            Note that the number of characteristics (L) passed,
            has to match the number of characteristics used when fitting
            the regressor.
            The columns of the panel are organized in the following order:

            - Column 1: entity id (i)
            - Column 2: time index (t)
            - Column 3 to column 3+L: characteristics.

        y : numpy array
            dependent variable where indices correspond to those in X

        indices : numpy array
            array containing the panel indices.  Should consist of two
            columns:

            - Column 1: entity id (i)
            - Column 2: time index (t)

            The panel may be unbalanced. The number of unique entities is
            n_samples, the number of unique dates is T, and the number of
            characteristics used as instruments is L.

        mean_factor: boolean
            If true, the estimated factors are averaged in the time-series
            before prediction.


        Returns
        -------

        ypred : numpy array
            The length of the returned array matches the
            the length of data. A nan will be returned if there is missing
            characteristics information.
        """

        X, y, indices, metad = _prep_input(X, y, indices)
        N, L, T = metad["N"], metad["L"], metad["T"]
        ypred = np.full((N), np.nan)

        # Compute realized factor returns
        Numer = self.Gamma.T.dot(X.T).dot(y)
        Denom = self.Gamma.T.dot(X.T).dot(X).dot(self.Gamma)
        Factor_OOS = np.linalg.solve(Denom, Numer.reshape((-1, 1)))

        if mean_factor:
            ypred = np.squeeze(X.dot(self.Gamma)\
                    .dot(np.mean(self.Factors, axis=1).reshape((-1, 1))))
        else:
            ypred = np.diag(X.dot(self.Gamma).dot(Factor_OOS))

        return ypred


    def _fit_ipca(self, X=None, y=None, indices=None, PSF=None, Q=None,
                  W=None, val_obs=None, Gamma=None, Factors=None, quiet=False,
                  data_type="portfolio", **kwargs):
        """
        Fits the regressor to the data using alternating least squares

        Parameters
        ----------

        X :  numpy array, optional
            matrix of characteristics where each row corresponds to a
            entity-time pair in indices.  The number of characteristics
            (columns here) used as instruments is L.

        y : numpy array, optional
            dependent variable where indices correspond to those in X

        indices : numpy array, optional
            array containing the panel indices.  Should consist of two
            columns:

            - Column 1: entity id (i)
            - Column 2: time index (t)

            The panel may be unbalanced. The number of unique entities is
            n_samples, the number of unique dates is T, and the number of
            characteristics used as instruments is L.

        PSF : array-like of shape (M, T), optional
            pre-specified factors

        Q : array-like of shape (L,T), optional
            characteristics weighted portfolios

        W : array_like of shape (L, L,T), optional
            portfolio weights

        val_obs: array-like, optional
            matrix of dimension (T), containting the number of non missing
            observations at each point in time

        Gamma : numpy array, optional
            If provided, starting values for Gamma

        Factors : numpy array, optional
            If provided, starting values for Factors

        data_type : str
            label for method used when fitting ALS, should be one of:

            1. panel
            2. portfolio

        quiet   : optional, bool
            If true no text output will be produced

        Returns
        -------
        Gamma : array-like with dimensions (L, n_factors). If there
            are n_prespec many pre-specified factors in the model then the
            matrix returned is of dimension (L, (n_factors+M)).
            If an intercept is included in the model, its loadings are
            returned in the last column of Gamma.

        Factors : array_like with dimensions (n_factors, T). If
            pre-specified factors were passed the returned matrix is
            of dimension ((n_factors - M), T), corresponding to the
            n_factors - M many factors estimated on top of the pre-
            specified ones.
        """

        if data_type == "panel":
            ALS_inputs = (X, y, indices)
            ALS_fit = self._ALS_fit_panel
        elif data_type == "portfolio":
            ALS_inputs = (Q, W, val_obs)
            ALS_fit = self._ALS_fit_portfolio
        else:
            raise ValueError("Unsupported ALS method: %s" % data_type)

        # Initialize the Alternating Least Squares Procedure
        if Gamma is None or Factors is None:
            Gamma_Old, s, v = np.linalg.svd(Q)
            Gamma_Old = Gamma_Old[:, :self.n_factors_eff]
            s = s[:self.n_factors_eff]
            v = v[:self.n_factors_eff, :]
            Factor_Old = np.diag(s).dot(v)
        if Gamma is not None:
            Gamma_Old = Gamma
        if Factors is not None:
            Factors_Old = Factors

        # Estimation Step
        tol_current = 1

        iter = 0

        while((iter <= self.max_iter) and (tol_current > self.iter_tol)):

            Gamma_New, Factor_New = ALS_fit(Gamma_Old, *ALS_inputs,
                                            PSF=PSF, **kwargs)

            if self.PSFcase:
                tol_current = np.max(np.abs(Gamma_New - Gamma_Old))
            else:
                tol_current_G = np.max(np.abs(Gamma_New - Gamma_Old))
                tol_current_F = np.max(np.abs(Factor_New - Factor_Old))
                tol_current = max(tol_current_G, tol_current_F)

            # Update factors and loadings
            Factor_Old, Gamma_Old = Factor_New, Gamma_New

            iter += 1
            if not quiet:
                print('Step', iter, '- Aggregate Update:', tol_current)

        if not quiet:
            print('-- Convergence Reached --')

        return Gamma_New, Factor_New


    def _ALS_fit_portfolio(self, Gamma_Old, Q, W, val_obs, PSF=None, **kwargs):
        """Alternating least squares procedure to fit params

        Runs using portfolio data as input

        The alternating least squares procedure switches back and forth
        between evaluating the first order conditions for Gamma_Beta, and the
        factors until convergence is reached. This function carries out one
        complete update procedure and will need to be called repeatedly using
        the updated Gamma's and factors as inputs.
        """

        T = self.metad["T"]

        if PSF is None:
            L, K = np.shape(Gamma_Old)
            Ktilde = K
        else:
            L, Ktilde = np.shape(Gamma_Old)
            K_PSF, _ = np.shape(PSF)
            K = Ktilde - K_PSF

        # ALS Step 1
        if K > 0:

            # case with no observed factors
            if PSF is None:
                if self.n_jobs > 1:
                    F_New = Parallel(n_jobs=self.n_jobs,
                                    backend=self.backend)(
                                delayed(_Ft_fit_portfolio)(
                                    Gamma_Old, W[:,:,t], Q[:,t])
                                for t in range(T))
                    F_New = np.stack(F_New, axis=1)

                else:
                    F_New = np.full((K, T), np.nan)
                    for t in range(T):
                        F_New[:,t] = _Ft_fit_portfolio(Gamma_Old, W[:,:,t],
                                                       Q[:,t])

            # observed factors+latent factors case
            else:
                if self.n_jobs > 1:
                    F_New = Parallel(n_jobs=n_jobs, backend=backend)(
                                delayed(_Ft_fit_PSF_portfolio)(
                                    Gamma_Old, W[:,:,t], Q[:,t], PSF[:,t],
                                    K, Ktilde)
                                for t in range(T))
                    F_New = np.stack(F_New, axis=1)

                else:
                    F_New = np.full((K, T), np.nan)
                    for t in range(T):
                        F_New[:,t] = _Ft_fit_PSF_portfolio(Gamma_Old,
                                                           W[:,:,t], Q[:,t],
                                                           PSF[:,t], K,
                                                           Ktilde)

        else:
            F_New = None

        # ALS Step 2
        Gamma_New = _Gamma_fit_portfolio(F_New, Q, W, val_obs, PSF, L, K,
                                         Ktilde, T)

        # condition checks

        if K > 0:
            # Enforce Orthogonality in Gamma_alpha and Gamma_beta
            R1 = _numba_chol(Gamma_New[:, :K].T.dot(Gamma_New[:, :K])).T
            R2, _, _ = _numba_svd(R1.dot(F_New).dot(F_New.T).dot(R1.T))
            Gamma_New[:, :K] = _numba_lstsq(Gamma_New[:, :K].T,
                                            R1.T)[0].dot(R2)
            F_New = _numba_solve(R2, R1.dot(F_New))

            # Enforce sign convention for Gamma_Beta and F_New
            sg = np.sign(np.mean(F_New, axis=1)).reshape((-1, 1))
            sg[sg == 0] = 1
            Gamma_New[:, :K] = np.multiply(Gamma_New[:, :K], sg.T)
            F_New = np.multiply(F_New, sg)

            if PSF is not None:
                Gamma_New[:, K:] = (np.identity(Gamma_New.shape[0]) - Gamma_New[:, :K].dot(Gamma_New[:, :K].T)).dot(Gamma_New[:, K:])
                F_New += Gamma_New[:, :K].T.dot(Gamma_New[:, K:]).dot(PSF)

                sg = np.sign(np.mean(F_New, axis=1)).reshape((-1, 1))
                sg[sg == 0] = 1
                Gamma_New[:, :K] = np.multiply(Gamma_New[:, :K], sg.T)
                F_New = np.multiply(F_New, sg)

        return Gamma_New, F_New


    def _ALS_fit_panel(self, Gamma_Old, X, y, indices, PSF=None, **kwargs):
        """Alternating least squares procedure to fit params

        Runs using panel data as input

        The alternating least squares procedure switches back and forth
        between evaluating the first order conditions for Gamma_Beta, and the
        factors until convergence is reached. This function carries out one
        complete update procedure and will need to be called repeatedly using
        the updated Gamma's and factors as inputs.
        """

        T, dates = self.metad["T"], self.metad["dates"]

        if PSF is None:
            L, K = np.shape(Gamma_Old)
            Ktilde = K
        else:
            L, Ktilde = np.shape(Gamma_Old)
            K_PSF, _ = np.shape(PSF)
            K = Ktilde - K_PSF

        # prep T-ind for iteration
        Tind = [np.where(indices[:,1] == t)[0] for t in range(T)]

        # ALS Step 1
        if K > 0:

            # case with no observed factors
            if PSF is None:
                if self.n_jobs > 1:
                    F_New = Parallel(n_jobs=self.n_jobs,
                                    backend=self.backend)(
                                delayed(_Ft_fit_panel)(
                                    Gamma_Old, X[tind,:], y[tind])
                                for t, tind in enumerate(Tind))
                    F_New = np.stack(F_New, axis=1)

                else:
                    F_New = np.full((K, T), np.nan)
                    for t, tind in enumerate(Tind):
                        F_New[:,t] = _Ft_fit_panel(Gamma_Old, X[tind,:],
                                                   y[tind])

            # observed factors+latent factors case
            else:
                if self.n_jobs > 1:
                    F_New = Parallel(n_jobs=n_jobs, backend=backend)(
                                delayed(_Ft_fit_PSF_panel)(
                                    Gamma_Old, X[tind,:], y[tind],
                                    PSF[:,t], K, Ktilde)
                                for t, tind in enumerate(Tind))
                    F_New = np.stack(F_New, axis=1)

                else:
                    F_New = np.full((K, T), np.nan)
                    for t, tind in enumerate(Tind):
                        F_New[:,t] = _Ft_fit_PSF_panel(Gamma_Old, X[tind,:],
                                                       y[tind], PSF[:,t],
                                                       K, Ktilde)

        else:
            F_New = None

        # ALS Step 2
        Gamma_New = _Gamma_fit_panel(F_New, X, y, indices, PSF, L, Ktilde,
                                     self.alpha, self.l1_ratio, **kwargs)

        # condition checks

        if K > 0:
            # Enforce Orthogonality in Gamma_alpha and Gamma_beta
            R1 = _numba_chol(Gamma_New[:, :K].T.dot(Gamma_New[:, :K])).T
            R2, _, _ = _numba_svd(R1.dot(F_New).dot(F_New.T).dot(R1.T))
            Gamma_New[:, :K] = _numba_lstsq(Gamma_New[:, :K].T,
                                            R1.T)[0].dot(R2)
            F_New = _numba_solve(R2, R1.dot(F_New))

            # Enforce sign convention for Gamma_Beta and F_New
            sg = np.sign(np.mean(F_New, axis=1)).reshape((-1, 1))
            sg[sg == 0] = 1
            Gamma_New[:, :K] = np.multiply(Gamma_New[:, :K], sg.T)
            F_New = np.multiply(F_New, sg)

            if PSF is not None:
                Gamma_New[:, K:] = (np.identity(Gamma_New.shape[0]) - Gamma_New[:, :K].dot(Gamma_New[:, :K].T)).dot(Gamma_New[:, K:])
                F_New += Gamma_New[:, :K].T.dot(Gamma_New[:, K:]).dot(PSF)

                sg = np.sign(np.mean(F_New, axis=1)).reshape((-1, 1))
                sg[sg == 0] = 1
                Gamma_New[:, :K] = np.multiply(Gamma_New[:, :K], sg.T)
                F_New = np.multiply(F_New, sg)

        return Gamma_New, F_New


def _prep_input(X, y=None, indices=None):
    """handle mapping from different inputs type to consistent internal data

    Parameters
    ----------
    X :  numpy array or pandas DataFrame
        matrix of characteristics where each row corresponds to a
        entity-time pair in indices.  The number of characteristics
        (columns here) used as instruments is L.

        If given as a DataFrame, we assume that it contains a mutliindex
        mapping to each entity-time pair

    y : numpy array or pandas Series, optional
        dependent variable where indices correspond to those in X

        If given as a Series, we assume that it contains a mutliindex
        mapping to each entity-time pair

    indices : numpy array or pandas MultiIndex, optional
        array containing the panel indices.  Should consist of two
        columns:

        - Column 1: entity id (i)
        - Column 2: time index (t)

        The panel may be unbalanced. The number of unique entities is
        n_samples, the number of unique dates is T, and the number of
        characteristics used as instruments is L.

    Returns
    -------
    X :  numpy array
        matrix of characteristics where each row corresponds to a
        entity-time pair in indices.  The number of characteristics
        (columns here) used as instruments is L.

    y : numpy array
        dependent variable where indices correspond to those in X

    indices : numpy array
        array containing the panel indices.  Should consist of two
        columns:

        - Column 1: entity id (i)
        - Column 2: time index (t)

        The panel may be unbalanced. The number of unique entities is
        n_samples, the number of unique dates is T, and the number of
        characteristics used as instruments is L.

    metad : dict
        contains metadata on inputs:

        dates : array-like
            unique dates in panel
        ids : array-like
            unique ids in panel
        chars : array-like
            labels for X chars/columns
        T : scalar
            number of time periods
        N : scalar
            number of ids
        L : scalar
            total number of characteristics
    """

    # Check panel input
    if X is None:
        raise ValueError('Must pass panel input data.')
    else:
        # remove panel rows containing missing obs
        non_nan_ind = ~np.any(np.isnan(X), axis=1)
        X = X[non_nan_ind]
        if y is not None:
            y = y[non_nan_ind]

    # if data-frames passed, break out indices from data
    if isinstance(X, pd.DataFrame) and not isinstance(y, pd.Series):
        indices = X.index
        chars = X.columns
        X = X.values
    elif not isinstance(X, pd.DataFrame) and isinstance(y, pd.Series):
        indices = y.index
        y = y.values
        chars = np.arange(X.shape[1])
    elif isinstance(X, pd.DataFrame) and isinstance(y, pd.Series):
        Xind = X.index
        chars = X.columns
        yind = y.index
        X = X.values
        y = y.values
        if not np.array_equal(Xind, yind):
            raise ValueError("If indices are provided with both X and y\
                              they be the same")
        indices = Xind
    else:
        chars = np.arange(X.shape[1])

    if indices is None:
        raise ValueError("entity-time indices must be provided either\
                          separately or as a MultiIndex with X/y")

    # extract numpy array and labels from multiindex
    if isinstance(indices, pd.MultiIndex):
        indices = indices.to_frame().values
    ids = np.unique(indices[:, 0])
    dates = np.unique(indices[:, 1])
    indices[:,0] = np.unique(indices[:,0], return_inverse=True)[1]
    indices[:,1] = np.unique(indices[:,1], return_inverse=True)[1]

    # init data dimensions
    T = np.size(dates, axis=0)
    N = np.size(ids, axis=0)
    L = np.size(chars, axis=0)

    # prep metadata
    metad = {}
    metad["dates"] = dates
    metad["ids"] = ids
    metad["chars"] = chars
    metad["T"] = T
    metad["N"] = N
    metad["L"] = L

    return X, y, indices, metad


def _build_portfolio(X, y, indices, metad):
    """ Converts a stacked panel of data where each row corresponds to an
    observation (i, t) into a tensor of dimensions (N, L, T) where N is the
    number of unique entities, L is the number of characteristics and T is
    the number of unique dates

    Parameters
    ----------
    X :  numpy array
        matrix of characteristics where each row corresponds to a
        entity-time pair in indices.  The number of characteristics
        (columns here) used as instruments is L.

    y : numpy array, optional
        dependent variable where indices correspond to those in X

        If None, we just build the portfolio info for ind vars

    indices : numpy array
        array containing the panel indices.  Should consist of two
        columns:

        - Column 1: entity id (i)
        - Column 2: time index (t)

        The panel may be unbalanced. The number of unique entities is
        n_samples, the number of unique dates is T, and the number of
        characteristics used as instruments is L.

    metad : dict
        dictionary containing metadata

    Returns
    -------
    Q: array-like, optional
        matrix of dimensions (L, T), containing the characteristics
        weighted portfolios

    W: array-like, optional
        matrix of dimension (L, L, T)

    val_obs: array-like
        matrix of dimension (T), containting the number of non missing
        observations at each point in time
    """

    N, L, T = metad["N"], metad["L"], metad["T"]

    print('The panel dimensions are:')
    print('n_samples:', N, ', L:', L, ', T:', T)

    bar = progressbar.ProgressBar(maxval=T,
                                  widgets=[progressbar.Bar('=', '[', ']'),
                                           ' ', progressbar.Percentage()])
    bar.start()

    # init portfolio outputs based on input type
    if y is not None:
        Q = np.full((L, T), np.nan)
    else:
        Q = None
    W = np.full((L, L, T), np.nan)
    val_obs = np.full((T), np.nan)

    if y is None:
        for t in range(T):
            ixt = (indices[:, 1] == t)
            val_obs[t] = np.sum(ixt)
            W[:, :, t] = X[ixt, :].T.dot(X[ixt, :])/val_obs[t]
            bar.update(t)

    else:
        for t in range(T):
            ixt = (indices[:, 1] == t)
            val_obs[t] = np.sum(ixt)
            Q[:, t] = X[ixt, :].T.dot(y[ixt])/val_obs[t]
            W[:, :, t] = X[ixt, :].T.dot(X[ixt, :])/val_obs[t]
            bar.update(t)

    bar.finish()

    # return portfolio data
    return Q, W, val_obs


def _Ft_fit_portfolio(Gamma_Old, W_t, Q_t):
    """helper func to parallelize F ALS fit"""

    m1 = Gamma_Old.T.dot(W_t).dot(Gamma_Old)
    m2 = Gamma_Old.T.dot(Q_t)

    return np.squeeze(_numba_solve(m1, m2.reshape((-1, 1))))


def _Ft_fit_PSF_portfolio(Gamma_Old, W_t, Q_t, PSF_t, K, Ktilde):
    """helper func to parallelize F ALS fit with observed factors"""

    m1 = Gamma_Old[:,:K].T.dot(W_t).dot(Gamma_Old[:,:K])
    m2 = Gamma_Old[:,:K].T.dot(Q_t)
    m2 -= Gamma_Old[:,:K].T.dot(W_t).dot(Gamma_Old[:,K:Ktilde]).dot(PSF_t)

    return np.squeeze(_numba_solve(m1, m2.reshape((-1, 1))))


def _Ft_fit_panel(Gamma_Old, X_t, y_t):
    """fits F_t using panel data"""

    exog_t = X_t.dot(Gamma_Old)
    Ft = _numba_lstsq(exog_t, y_t)[0]

    return Ft


def _Ft_fit_PSF_panel(Gamma_Old, X_t, y_t, PSF_t, K, Ktilde):
    """fits F_t using panel data with PSF"""

    exog_t = X_t.dot(Gamma_Old)
    y_t_resid = y_t - exog_t[:,K:Ktilde].dot(PSF_t)
    Ft = _numba_lstsq(exog_t[:,:K], y_t_resid)[0]

    return Ft


def _Gamma_fit_portfolio(F_New, Q, W, val_obs, PSF, L, K, Ktilde, T):
    """helper function for fitting gamma without panel"""

    Numer = _numba_full((L*Ktilde, 1), 0.0)
    Denom = _numba_full((L*Ktilde, L*Ktilde), 0.0)

    # no observed factors
    if PSF is None:
        for t in range(T):

            Numer += _numba_kron(Q[:, t].reshape((-1, 1)),
                                 F_New[:, t].reshape((-1, 1)))\
                                 * val_obs[t]
            Denom += _numba_kron(W[:, :, t],
                                 F_New[:, t].reshape((-1, 1))
                                 .dot(F_New[:, t].reshape((1, -1)))) \
                                 * val_obs[t]

    # observed+latent factors
    elif K > 0:
        for t in range(T):
            Numer += _numba_kron(Q[:, t].reshape((-1, 1)),
                                 np.vstack(
                                 (F_New[:, t].reshape((-1, 1)),
                                 PSF[:, t].reshape((-1, 1)))))\
                                 * val_obs[t]
            Denom_temp = np.vstack((F_New[:, t].reshape((-1, 1)),
                                   PSF[:, t].reshape((-1, 1))))
            Denom += _numba_kron(W[:, :, t], Denom_temp.dot(Denom_temp.T)
                                 * val_obs[t])

    # only observed factors
    else:
        for t in range(T):
            Numer += _numba_kron(Q[:, t].reshape((-1, 1)),
                                 PSF[:, t].reshape((-1, 1)))\
                                 * val_obs[t]
            Denom += _numba_kron(W[:, :, t],
                                 PSF[:, t].reshape((-1, 1))
                                 .dot(PSF[:, t].reshape((-1, 1)).T))\
                                 * val_obs[t]

    Gamma_New = _numba_solve(Denom, Numer).reshape((L, Ktilde))

    return Gamma_New


def _Gamma_fit_panel(F_New, X, y, indices, PSF, L, Ktilde, alpha, l1_ratio,
                     **kwargs):
    """helper function for estimating vectorized Gamma with panel"""

    # join observed factors with latent factors and map to panel
    if PSF is None:
        F = F_New
    else:
        if F_New is None:
            F = PSF
        else:
            F = np.vstack((F_New, PSF))
    F = F[:,indices[:,1]]

    # interact factors and characteristics
    ZkF = np.hstack([F[k,:,None] * X for k in range(Ktilde)])

    # elastic net fit
    if alpha:
        mod = ElasticNet(alpha=alpha, l1_ratio=l1_ratio, **kwargs)
        mod.fit(ZkF, y)
        gamma = mod.coef_

    # OLS fit
    else:
        gamma = _numba_lstsq(ZkF, y)[0]

    gamma = gamma.reshape((Ktilde, L)).T

    return gamma


def _fit_cv(model, X, y, indices, PSF, n_splits, split_method, alpha,
            **kwargs):
    """inner function for fit_path doing CV

    Parameters
    ----------
    model : IPCA model instance
        contains the params
    X :  numpy array
        matrix of characteristics where each row corresponds to a
        entity-time pair in indices.  The number of characteristics
        (columns here) used as instruments is L.
    y : numpy array
        dependent variable where indices correspond to those in X
    indices : numpy array
        array containing the panel indices.  Should consist of two
        columns:

        - Column 1: entity id (i)
        - Column 2: time index (t)

        The panel may be unbalanced. The number of unique entities is
        n_samples, the number of unique dates is T, and the number of
        characteristics used as instruments is L.
    PSF : numpy array, optional
        Set of pre-specified factors as matrix of dimension (M, T)
    n_splits : scalar
        number of CV partitions
    split_method : sklearn cross-validation generator factory
        method to generate CV partitions
    alpha : scalar
        regularizing constant for current step in elastic-net path

    Returns
    -------
    mse : array
        array of MSEs for each CV partition

    Notes
    -----
    Groups are defined by 'ids'
    """

    # build iterator
    mse_l = []
    split = split_method(n_splits=n_splits)

    full_tind = np.unique(indices[:,1])

    for train, test in split.split(indices, groups=indices[:,0]):

        # build partitioned model
        train_X = X[train,:]
        train_y = y[train]
        train_indices = indices[train,:]
        test_X = X[test,:]
        test_y = y[test]
        test_indices = indices[test,:]
        if PSF is None:
            train_PSF = None
        else:
            train_tind = np.unique(train_indices[:,1])
            train_tind = np.where(np.isin(full_tind, train_tind))[0]
            train_PSF = PSF[:,train_tind]

        # init new training model
        params = model.get_params()
        params["alpha"] = alpha
        train_IPCA = InstrumentedPCA(**params)
        train_IPCA = train_IPCA.fit(train_X, train_y, train_indices,
                                    train_PSF, **kwargs)

        # get MSE
        test_pred = train_IPCA.predict(test_X, test_indices, mean_factor=True)
        mse = np.sum(np.square(test_y - test_pred))
        mse /= test_pred.shape[0]
        mse_l.append(mse)

    return np.array(mse_l)


def _BS_Walpha_sub(model, n, d):
    L, T = model.metad["L"], model.metad["T"]
    Q_b = np.full((L, T), np.nan)
    np.random.seed(n)

    # Re-estimate unrestricted model
    Gamma = None
    while Gamma is None:
        try:
            for t in range(T):
                d_temp = np.random.standard_t(5)
                d_temp *= d[:,np.random.randint(0,high=T)]
                Q_b[:, t] = model.W[:, :, t].dot(model.Gamma[:, :-1])\
                    .dot(model.Factors[:-1, t]) + d_temp
            Gamma, Factors = model._fit_ipca(Q=Q_b, W=model.W,
                                             val_obs=model.val_obs,
                                             PSF=model.PSF, quiet=True,
                                             data_type="portfolio")
        except np.linalg.LinAlgError:
            warnings.warn("Encountered singularity in bootstrap iteration.\
                           Observation discarded.")
            pass


    # Compute and store Walpha_b
    Walpha_b = Gamma[:, -1].T.dot(Gamma[:, -1])

    return Walpha_b


def _BS_Wbeta_sub(model, n, d, l):
    L, T = model.metad["L"], model.metad["T"]
    Q_b = np.full((L, T), np.nan)
    np.random.seed(n)
    #Modify Gamma_beta such that its l-th row is zero
    Gamma_beta_l = np.copy(model.Gamma)
    Gamma_beta_l[l, :] = 0

    Gamma = None
    while Gamma is None:
        try:
            for t in range(T):
                d_temp = np.random.standard_t(5)
                d_temp *= d[:,np.random.randint(0,high=T)]
                Q_b[:, t] = model.W[:, :, t].dot(Gamma_beta_l)\
                    .dot(model.Factors[:, t]) + d_temp
            Gamma, Factors = model._fit_ipca(Q=Q_b, W=model.W,
                                             val_obs=model.val_obs,
                                             PSF=model.PSF, quiet=True,
                                             data_type="portfolio")

        except np.linalg.LinAlgError:
            warnings.warn("Encountered singularity in bootstrap iteration.\
                           Observation discarded.")
            pass

    # Compute and store Walpha_b
    Wbeta_l_b = Gamma[l, :].dot(Gamma[l, :].T)
    Wbeta_l_b = np.trace(Wbeta_l_b)
    return Wbeta_l_b

def _BS_Wdelta_sub(model, n, d):
        L, T = model.metad["L"], model.metad["T"]
        Q_b = np.full((L, T), np.nan)
        np.random.seed(n)

        #Modify Gamma_delta such that its last row for the PSF is zero
        Gamma_delta = np.copy(model.Gamma)
        Gamma_delta[:, -1] = 0

        Gamma = None
        while Gamma is None:
            try:
                for t in range(T):
                    d_temp = np.random.standard_t(5)
                    d_temp *= d[:,np.random.randint(0,high=T)]
                    Q_b[:, t] = model.W[:, :, t].dot(Gamma_delta)\
                        .dot(model.Factors[:, t]) + d_temp
                Gamma, Factors = model._fit_ipca(Q=Q_b, W=model.W,
                                                 val_obs=model.val_obs,
                                                 PSF=model.PSF, quiet=True,
                                                 data_type="portfolio")

            except np.linalg.LinAlgError:
                warnings.warn("Encountered singularity in bootstrap iteration.\
                               Observation discarded.")
                pass

        # Compute and store Wdelta_b
        Wdelta_b = Gamma[:, -1].T.dot(Gamma[:, -1])
        return Wdelta_b


@jit(nopython=True)
def _numba_solve(m1, m2):
    return np.linalg.solve(np.ascontiguousarray(m1), np.ascontiguousarray(m2))

@jit(nopython=True)
def _numba_lstsq(m1, m2):
    return np.linalg.lstsq(np.ascontiguousarray(m1), np.ascontiguousarray(m2))

@jit(nopython=True)
def _numba_kron(m1, m2):
    return np.kron(np.ascontiguousarray(m1), np.ascontiguousarray(m2))

@jit(nopython=True)
def _numba_chol(m1):
    return np.linalg.cholesky(np.ascontiguousarray(m1))

@jit(nopython=True)
def _numba_svd(m1):
    return np.linalg.svd(np.ascontiguousarray(m1))

@jit(nopython=True)
def _numba_full(m1, m2):
    return np.full(m1, m2)