Source code for utils


import numpy as np


[docs]
class PinballLoss():
    '''
    Pinball (a.k.a. Quantile) loss function

    Parameters:
    ----------------
        - theta: float
            the target confidence level
        - ret_mean: bool, optional
            if True, the function returns the mean of the loss, otherwise the loss point-by-point. Default is True

    Example of usage
    ----------------
    .. code-block:: python

        import numpy as np
        from utils import PinballLoss

        y = np.random.randn(250)*1e-2  #Replace with price returns
        qf = np.random.uniform(-1, 0, 250)  #Replace with quantile forecasts
        theta = 0.05 #Set the desired confidence level

        PinballLoss(theta)(qf, y) #Compute the pinball loss
    
    Methods:
    ----------------
    '''
    def __init__(self, theta, ret_mean=True):
        self.theta = theta
        self.ret_mean = ret_mean
    

[docs]
    def __call__(self, y_pred, y_true):
        '''
        Compute the pinball loss

        INPUTS:
            - y_pred: ndarray
                the predicted values
            - y_true: ndarray
                the true values

        OUTPUTS:
            - loss: float
                the loss function mean value, if ret_mean is True. Otherwise, the loss for each observation
        '''
        #Check consistency in the dimensions
        if len(y_pred.shape) == 1:
            y_pred = y_pred.reshape(-1,1)
        if len(y_true.shape) == 1:
            y_true = y_true.reshape(-1,1)
        if y_pred.shape != y_true.shape:
            raise ValueError(f'Dimensions of y_pred ({y_pred.shape}) and y_true ({y_true.shape}) do not match!!!')
        # Compute the pinball loss
        error = y_true - y_pred
        loss = np.where(error >= 0, self.theta * error, (self.theta - 1) * error)
        if self.ret_mean: #If true, return the mean of the loss
            loss = np.mean(loss)
        return loss




[docs]
class barrera_loss():
    '''
    Barrera loss function. Eq. (2.13) in:
    
    Barrera, D., Crépey, S., Gobet, E., Nguyen, H. D., & Saadeddine, B. (2022). Learning value-at-risk and expected shortfall. arXiv preprint arXiv:2209.06476.
    
    Parameters:
    ----------------
        - theta: float
            the target confidence level
        - ret_mean: bool, optional
            if True, the function returns the mean of the loss, otherwise the loss point-by-point. Default is True

    Example of usage
    ----------------
    .. code-block:: python

        import numpy as np
        from utils import barrera_loss

        y = np.random.randn(250)*1e-2  #Replace with price returns
        qf = np.random.uniform(-1, 0, 250)  #Replace with quantile forecasts
        ef = np.random.uniform(-1, 0, 250)  #Replace with expected shortfall forecasts
        theta = 0.05 #Set the desired confidence level

        barrera_loss(theta)(qf, ef, y) #Compute the barrera loss
    
    Methods:
    ----------------
    '''
    def __init__(self, theta, ret_mean=True):
        self.theta = theta
        self.ret_mean = ret_mean
    

[docs]
    def __call__(self, v, e, y):
        '''
        INPUTS:
            - v: ndarray
                the quantile estimate
            - e: ndarray
                the expected shortfall estimate
            - y: ndarray
                the actual time series

        OUTPUTS:
            - loss: float
                the loss function mean value, if ret_mean is True. Otherwise, the loss for each observation
        '''
        v, e, y = v.flatten(), e.flatten(), y.flatten()
        r = e - v #Barrera loss is computed on the difference ES - VaR
        if self.ret_mean: #If true, return the mean of the loss
            loss = np.nanmean( (r - np.where(y<v, (y-v)/self.theta, 0))**2 )
        else: #Otherwise, return the loss for each observation
            loss = (r - np.where(y<v, (y-v)/self.theta, 0))**2
        return loss




[docs]
class patton_loss():
    '''
    Patton loss function. Eq. (6) in:

    Patton, A. J., Ziegel, J. F., & Chen, R. (2019). Dynamic semiparametric models for expected shortfall (and value-at-risk). Journal of econometrics, 211(2), 388-413.

    Parameters:
    ----------------
        - theta: float
            the target confidence level
        - ret_mean: bool, optional
            if True, the function returns the mean of the loss, otherwise the loss point-by-point. Default is True

    Example of usage
    ----------------
    .. code-block:: python

        import numpy as np
        from utils import patton_loss

        y = np.random.randn(250)*1e-2  #Replace with price returns
        qf = np.random.uniform(-1, 0, 250)  #Replace with quantile forecasts
        ef = np.random.uniform(-1, 0, 250)  #Replace with expected shortfall forecasts
        theta = 0.05 #Set the desired confidence level

        losses = patton_loss(theta, ret_mean=False)(qf, ef, y) #Compute the patton loss
    
    Methods:
    ----------------
    '''
    def __init__(self, theta, ret_mean=True):
        self.theta = theta
        self.ret_mean = ret_mean
    

[docs]
    def __call__(self, v, e, y):
        '''
        INPUTS:
            - v: ndarray
                the quantile estimate
            - e: ndarray
                the expected shortfall estimate
            - y: ndarray
                the actual time series

        OUTPUTS:
            - loss: float
                the loss function mean value, if ret_mean is True. Otherwise, the loss for each observation
        '''
        v, e, y = v.flatten()*100, e.flatten()*100, y.flatten()*100
        if self.ret_mean: #If true, return the mean of the loss
            loss = np.nanmean(
                np.where(y<=v, (y-v)/(self.theta*e), 0) + v/e + np.log(-e) - 1
            )
        else: #Otherwise, return the loss for each observation
            loss = np.where(y<=v, (y-v)/(self.theta*e), 0) + v/e + np.log(-e) - 1
        return loss




[docs]
class DMtest():
    '''
    Diebold-Mariano test for the equality of forecast accuracy. The null H0: E[loss_func(Q1, E1, Y)] == E[loss_func(Q2, E2, Y)] is tested.

    Parameters:
    ----------------
        - loss_func: callable
            the loss function to compute the forecast accuracy
        - h: int, optional
            the maximum lag to compute the autocovariance. Default is 1

    Example of usage
    ----------------
    .. code-block:: python

        import numpy as np
        from utils import DMtest, patton_loss

        y = np.random.randn(250)*1e-2  #Replace with price returns
        qf_1 = np.random.uniform(-1, 0, 250)  #Replace with quantile forecasts of algorithm 1
        ef_1 = np.random.uniform(-1, 0, 250)  #Replace with expected shortfall forecasts of algorithm 1
        qf_2 = np.random.uniform(-1, 0, 250)  #Replace with quantile forecasts of algorithm 2
        ef_2 = np.random.uniform(-1, 0, 250)  #Replace with expected shortfall forecasts of algorithm 2
        theta = 0.05 #Set the desired confidence level

        DMtest(patton_loss(theta, ret_mean=False))(qf_1, ef_1, qf_2, ef_2, y) #Compute the Diebold Mariano test (with Patton loss)
    
    Methods:
    ----------------
    '''
    def __init__(self, loss_func, h = 1):
        self.loss_func = loss_func
        self.h = h

    def autocovariance(self, Xi, T, k, Xs):
        '''
        Compute the autocovariance of a time series

        INPUTS:
            - Xi: ndarray
            the time series
            - T: int
            the length of the time series
            - k: int
            the lag
            - Xs: float
            the mean of the time series

        OUTPUTS:
            - autoCov: float
                the autocovariance
        
        :meta private:
        '''
        autoCov = 0
        for i in np.arange(0, T-k):
            autoCov += ((Xi[i+k])-Xs)*(Xi[i]-Xs)
        autoCov = (1/T)*autoCov
        return autoCov
    

[docs]
    def __call__(self, Q1, E1, Q2, E2, Y):
        '''
        INPUTS:
            - Q1: ndarray
                the first set of quantile predictions
            - E1: ndarray
                the first set of expected shortfall predictions
            - Q2: ndarray
                the second set of quantile predictions
            - E2: ndarray
                the second set of expected shortfall predictions
            - Y: ndarray
                the actual time series

        OUTPUTS:
            - stat: float
                the test statistic
            - p_value: float
                the p-value of the test
            - mean_difference: float
                the mean difference of the losses
        '''
        import warnings
        from scipy.stats import t

        #Compute losses
        e1_lst = self.loss_func(Q1.flatten(), E1.flatten(), Y.flatten())
        e2_lst = self.loss_func(Q2.flatten(), E2.flatten(), Y.flatten())
        d_lst  = e1_lst - e2_lst
        # Clean NaN values, if any
        n = len(d_lst)
        d_lst = d_lst[~np.isnan(d_lst)]
        T = len(d_lst)
        if T < n:
            warnings.warn('There are NaN in the population! They have been removed.', UserWarning)
        if T == 0:
            warnings.warn('All values are NaN!', UserWarning)
            if np.sum(np.isnan(e1_lst)) == n:
                return {'stat':np.nan, 'p_value':0, 'mean_difference':np.inf}
            if np.sum(np.isnan(e2_lst)) == n:
                return {'stat':np.nan, 'p_value':0, 'mean_difference':-np.inf}
            else:
                return {'stat':np.nan, 'p_value':0, 'mean_difference':0}
        else:
            mean_d = np.mean(d_lst)
            
            # Find autocovariance and construct DM test statistics
            gamma = list()
            for lag in range(0, self.h):
                gamma.append(self.autocovariance(d_lst, T, lag, mean_d))
            V_d = (gamma[0] + 2*np.sum(gamma[1:]))/T
            DM_stat = mean_d / np.sqrt(V_d)
            harvey_adj = np.sqrt( (T+1-2*self.h + self.h*(self.h-1)/T) / T )
            DM_stat *= harvey_adj

            # Find p-value
            p_value = 2*t.cdf(-abs(DM_stat), df = T - 1)
            
            return {'stat':DM_stat, 'p_value':p_value, 'mean_difference':mean_d}




[docs]
def cr_t_test(errorsA, errorsB, train_len, test_len):
    '''
    Corrected resampled t-test for the equality of forecast accuracy. The null H0: E[errorsA] >= E[errorsB] is tested.

    INPUTS:
        - errorsA: ndarray
            the first set of forecast errors
        - errorsB: ndarray
            the second set of forecast errors
        - train_len: int
            the length of the training set
        - test_len: int
            the length of the test set

    OUTPUTS:
        - stat: float
            the test statistic
        - p_value: float
            the p-value of the test

    Example of usage
    ----------------
    .. code-block:: python
        import numpy as np
        from utils import cr_t_test, patton_loss

        theta = 0.05 #Set the desired confidence level
        train_len, test_len = 1250, 250 #Specify the dimension of train and test sets for each fold
        N_fold = 24 #Number of available folds

        # Compute the Patton losses for every fold
        loss_1, loss_2 = list(), list() #Initialize the losses lists
        for fold in range(N_fold):
            y = np.random.randn(test_len)*1e-2  #Replace with price returns
            qf_1 = np.random.uniform(-1, 0, test_len)  #Replace with quantile forecasts of algorithm 1
            ef_1 = np.random.uniform(-1, 0, test_len)  #Replace with expected shortfall forecasts of algorithm 1
            loss_1.append( patton_loss(theta)(qf_1, ef_1, y) ) #Compute the loss for algortihm 1

            qf_2 = np.random.uniform(-1, 0, test_len)  #Replace with quantile forecasts of algorithm 2
            ef_2 = np.random.uniform(-1, 0, test_len)  #Replace with expected shortfall forecasts of algorithm 2
            loss_2.append( patton_loss(theta)(qf_2, ef_2, y) ) #Compute the loss for algortihm 2

        cr_t_test(loss_1, loss_2, train_len, test_len) #Apply the test

    '''
    from scipy.stats import t as stud_t
    output = dict() #Initialize output
    J = len(errorsA) #Compute the number of folds
    if J != len(errorsB):
        raise ValueError('Both samples must have the same length!')
    if isinstance(errorsA, list):
        errorsA = np.array(errorsA)
    if isinstance(errorsB, list):
        errorsB = np.array(errorsB)
    mu_j = errorsA - errorsB #Vector of difference of generalization errors
    mu_hat = np.mean(mu_j) #Mean of the difference of generalization errors
    S2 = np.sum( (mu_j-mu_hat)**2 ) / (J-1) #In sample variance
    sigma2 = (1/J + test_len/train_len)*S2 #Adjusted variance
    output['stat'] = mu_hat / np.sqrt(sigma2)
    output['p_value'] = stud_t.cdf(output['stat'], J-1)
    return output



[docs]
class bootstrap_mean_test():
    '''
    Bootstrap test for assessing whenever mean of a sample is == or >= a target value

    Parameters:
    ----------------
            - mu_target: float
                the mean to test against
            - one_side: bool, optional
                if True, the test is one sided (i.e. H0: mu >= mu_target), otherwise it is two-sided (i.e. H0: mu == mu_target). Default is False
            - n_boot: int, optional
                the number of bootstrap replications. Default is 10_000
    '''
    def __init__(self, mu_target, one_side=False, n_boot=10_000):
        self.mu_target = mu_target
        self.one_side = one_side
        self.n_boot = n_boot
    
    def null_statistic(self, B_data):
        '''
        Compute the null statistic for the bootstrap sample

        INPUTS:
            - B_data: ndarray
                the bootstrap sample

        OUTPUTS:
            - stat: float
                the null statistic
        
        :meta private:
        '''
        return (np.mean(B_data) - self.obs_mean) * np.sqrt(self.n) / np.std(B_data)
    
    def statistic(self, data):
        '''
        Compute the test statistic for the original sample

        INPUTS:
            :data: ndarray
            the original sample

        OUTPUTS:
            - :float
                the test statistic

        :meta private:
        '''
        return (self.obs_mean - self.mu_target) * np.sqrt(self.n) / np.std(data)
    

[docs]
    def __call__(self, data, seed=None):
        '''
        Compute the test

        INPUTS:
            - data: ndarray
                the original sample
            - seed: int, optional
                the seed for the random number generator. Default is None

        OUTPUTS:
            - statistic: float
                the test statistic
            - p_value: float
                the p-value of the test
        '''
        np.random.seed(seed)

        self.obs_mean = np.mean(data)
        self.n = len(data)

        B_stats = list()
        for _ in range(self.n_boot):
            B_stats.append( self.null_statistic(
                np.random.choice(data, size=self.n, replace=True) ))
        B_stats = np.array(B_stats)
        self.B_stats = B_stats
        
        if self.one_side:
            obs = self.statistic(data)
            return {'statistic':obs, 'p_value':np.mean(B_stats < obs)}
        else:
            obs = np.abs(self.statistic(data))
            return {'statistic':self.statistic(data),
                    'p_value':np.mean((B_stats > obs) | (B_stats < -obs))}




[docs]
class McneilFrey_test(bootstrap_mean_test):
    '''
    McNeil-Frey test for assessing the goodness of the Expected Shortfall estimate, as described in:

    McNeil, A. J., & Frey, R. (2000). Estimation of tail-related risk measures for heteroscedastic financial time series: an extreme value approach. Journal of empirical finance, 7(3-4), 271-300.

    The null hypothesis is H0: the risk is not underestimated.

    Parameters:
    ----------------
        - one_side: bool, optional
            if True, the test is one sided (i.e. H0: mu >= mu_target). Default is False
        - n_boot: int, optional
            the number of bootstrap replications. Default is 10_000

    Example of usage
    ----------------
    .. code-block:: python

        import numpy as np
        from utils import McneilFrey_test

        y = np.random.randn(250)*1e-2  #Replace with price returns
        qf = np.random.uniform(-1, 0, 250)  #Replace with quantile forecasts
        ef = np.random.uniform(-1, 0, 250)  #Replace with expected shortfall forecasts

        McneilFrey_test(one_side=True)(qf, ef, y, seed=2) #Compute the McNeil-Frey test
    
    Methods:
    ----------------
    '''
    def __init__(self, one_side=False, n_boot=10_000):
        super().__init__(0, one_side, n_boot)
    
    def mnf_transform(self, Q, E, Y):
        '''
        Transform the data to compute the McNeil-Frey test

        INPUTS:
            :Q: ndarray
            the quantile estimates
            :E: ndarray
            the expected shortfall estimates
            :Y: ndarray
            the actual time series

        OUTPUTS:
            - :ndarray
                the transformed data

        :meta private:
        '''
        import warnings

        Q, E, Y = Q.flatten(), E.flatten(), Y.flatten() #Flatten the data
        output = (Y - E)[Y <= Q]
        n = len(output)
        output = output[~np.isnan(output)]
        if len(output) < n:
            warnings.warn('There are NaN in the population! They have been removed.', UserWarning)
        return output


[docs]
    def __call__(self, Q, E, Y, seed=None):
        '''
        Compute the test

        INPUTS:
            - Q: ndarray
                the quantile estimates
            - E: ndarray
                the expected shortfall estimates
            - Y: ndarray
                the actual time series
            - seed: int, optional
                the seed for the random number generator. Default is None

        OUTPUTS:
            - statistic: float
                the test statistic
            - p_value: float
                the p-value of the test
        '''
        return super().__call__( self.mnf_transform(Q, E, Y).flatten(), seed)




[docs]
class AS14_test(bootstrap_mean_test):
    '''
    Acerbi-Szekely test for assessing the goodness of the Expected Shortfall estimate, with both Z1 and Z2 statistics, as described in:

    Acerbi, C., & Szekely, B. (2014). Back-testing expected shortfall. Risk, 27(11), 76-81.

    The null hypothesis is H0: Q, E are the correct (latent) quantile and expected shortfall estimates for the observed time series Y.
    
    Parameters:
    ----------------
        - one_side: bool, optional
            if True, the test is one sided (i.e. H0: mu >= mu_target). Default is False
        - n_boot: int, optional
            the number of bootstrap replications. Default is 10_000

    Example of usage
    ----------------
    .. code-block:: python

        import numpy as np
        from utils import AS14_test

        y = np.random.randn(250)*1e-2  #Replace with price returns
        qf = np.random.uniform(-1, 0, 250)*1e-1  #Replace with quantile forecasts
        ef = np.random.uniform(-1, 0, 250)*1e-1  #Replace with expected shortfall forecasts
        theta = 0.05 #Set the desired confidence level

        # Compute the Acerbi-Szekely test with Z1 statistic
        AS14_test()(qf, ef, y, test_type='Z1', theta=theta, seed=2)
    
    Methods:
    ----------------
    '''
    def __init__(self, one_side=False, n_boot=10_000):
        super().__init__(-1, one_side, n_boot)
    
    def as14_transform(self, test_type, Q, E, Y, theta):
        '''
        Transform the data to compute the Acerbi-Szekely test

        INPUTS:
            :test_type: str
            the type of test to perform. It must be either 'Z1' or 'Z2'
            :Q: ndarray
            the quantile estimates
            :E: ndarray
            the expected shortfall estimates
            :Y: ndarray
            the actual time series
            :theta: float
            the threshold for the test

        OUTPUTS:
            - :ndarray
                the transformed data

        :meta private:
        '''
        import warnings

        Q, E, Y = Q.flatten(), E.flatten(), Y.flatten() #Flatten the data
        if test_type == 'Z1':
            output = (- Y/E)[Y <= Q]
        elif test_type == 'Z2':
            output = - Y * (Y <= Q) / (theta * E)
        else:
            raise ValueError(f'test_type {test_type} not recognized. It must be either Z1 or Z2')
        n = len(output)
        output = output[~np.isnan(output)]
        if len(output) < n:
            warnings.warn('There are NaN in the population! They have been removed.', UserWarning)
        return output


[docs]
    def __call__(self, Q, E, Y, theta, test_type='Z1', seed=None):
        '''
        Compute the test

        INPUTS:
            - Q: ndarray
                the quantile estimates
            - E: ndarray
                the expected shortfall estimates
            - Y: ndarray
                the actual time series
            - test_type: str, optional
                the type of test to perform. It must be either 'Z1' or 'Z2'. Default is 'Z1'
            - seed: int, optional
                the seed for the random number generator. Default is None
            
        OUTPUTS:
            - statistic: float
                the test statistic
            - p_value: float
                the p-value of the test
        '''
        return super().__call__( self.as14_transform(test_type, Q, E, Y, theta).flatten(), seed)




[docs]
class LossDiff_test(bootstrap_mean_test):
    '''
    Loss difference test to assess whenever the first sample of losses is statistically lower than the second. The null hypothesis is H0: E[loss(Q_new, E_new, Y)] >= E[loss(Q_bench, E_bench, Y)].

    Parameters:
    ----------------
        - loss: callable
            the loss function to compute the forecast accuracy
        - n_boot: int, optional
            the number of bootstrap replications. Default is 10_000

    Example of usage
    ----------------
    .. code-block:: python

        import numpy as np
        from utils import LossDiff_test, patton_loss

        y = np.random.randn(250)*1e-2  #Replace with price returns
        qf_1 = np.random.uniform(-1, 0, 250)  #Replace with quantile forecasts of algorithm 1
        ef_1 = np.random.uniform(-1, 0, 250)  #Replace with expected shortfall forecasts of algorithm 1
        qf_2 = np.random.uniform(-1, 0, 250)  #Replace with quantile forecasts of algorithm 2
        ef_2 = np.random.uniform(-1, 0, 250)  #Replace with expected shortfall forecasts of algorithm 2
        theta = 0.05 #Set the desired confidence level

        LossDiff_test(patton_loss(theta, ret_mean=False))(qf_1, ef_1, qf_2, ef_2, y) #Compute the Loss Difference test (with Patton loss)
    
    Methods:
    ----------------
    '''
    def __init__(self, loss, n_boot=10_000):
        super().__init__(0, True, n_boot)
        self.loss = loss
    
    def ld_transform(self, Q_new, E_new, Q_bench, E_bench, Y):
        '''
        Transform the data to compute the test

        INPUTS:
            :Q_new: ndarray
            the first set of quantile predictions
            :E_new: ndarray
            the first set of expected shortfall predictions
            :Q_bench: ndarray
            the second set of quantile predictions
            :E_bench: ndarray
            the second set of expected shortfall predictions
            :Y: ndarray
            the actual time series

        OUTPUTS:
            - :ndarray
                the transformed data

        :meta private:
        '''
        import warnings
        output = self.loss(Q_new, E_new, Y) - self.loss(Q_bench, E_bench, Y)
        n = len(output)
        output = output[~np.isnan(output)]
        if len(output) < n:
            warnings.warn('There are NaN in the population! They have been removed.', UserWarning)
        return output
    

[docs]
    def __call__(self, Q_new, E_new, Q_bench, E_bench, Y, seed=None):
        '''
        Compute the test

        INPUTS:
            - Q_new: ndarray
                the first set of quantile predictions
            - E_new: ndarray
                the first set of expected shortfall predictions
            - Q_bench: ndarray
                the second set of quantile predictions
            - E_bench: ndarray
                the second set of expected shortfall predictions
            - Y: ndarray
                the actual time series
            - seed: int, optional
                the seed for the random number generator. Default is None

        OUTPUTS:
            - statistic: float
                the test statistic
            - p_value: float
                the p-value of the test
        '''
        return super().__call__( self.ld_transform(
            Q_new, E_new, Q_bench, E_bench, Y).flatten(), seed)




[docs]
class Encompassing_test(bootstrap_mean_test):
    '''
    Encompassing test to assess whenever the first sample of losses is statistically lower than the second. As described in:

    Kışınbay, T. (2010). The use of encompassing tests for forecast combinations. Journal of Forecasting, 29(8), 715-727.

    The null hypothesis is H0: E[loss(Q_new, E_new, Y)] >= E[loss(Q_bench, E_bench, Y)].
    
    Parameters:
    ----------------
        - loss: callable
            the loss function to compute the forecast accuracy
        - n_boot: int, optional
            the number of bootstrap replications. Default is 10_000

    Example of usage
    ----------------
    .. code-block:: python

        import numpy as np
        from utils import Encompassing_test, patton_loss

        y = np.random.randn(250)*1e-2  #Replace with price returns
        qf_1 = np.random.uniform(-1, 0, 250)*1e-1  #Replace with quantile forecasts of algorithm 1
        ef_1 = np.random.uniform(-1, 0, 250)*1e-1  #Replace with expected shortfall forecasts of algorithm 1
        qf_2 = np.random.uniform(-1, 0, 250)*1e-1  #Replace with quantile forecasts of algorithm 2
        ef_2 = np.random.uniform(-1, 0, 250)*1e-1  #Replace with expected shortfall forecasts of algorithm 2
        theta = 0.05 #Set the desired confidence level

        Encompassing_test(patton_loss(theta, ret_mean=False))(qf_1, ef_1, qf_2, ef_2, y) #Compute the Encompassing test (with Patton loss)
    
    Methods:
    ----------------
    '''
    def __init__(self, loss, n_boot=10_000):
        super().__init__(0, True, n_boot)
        self.loss = loss

    def en_transform(self, Q_new, E_new, Q_bench, E_bench, Y):
        '''
        Transform the data to compute the test

        INPUTS:
            :Q_new: ndarray
            the first set of quantile predictions
            :E_new: ndarray
            the first set of expected shortfall predictions
            :Q_bench: ndarray
            the second set of quantile predictions
            :E_bench: ndarray
            the second set of expected shortfall predictions
            :Y: ndarray
            the actual time series

        OUTPUTS:
            - :ndarray
                the transformed data

        :meta private:
        '''
        import warnings
        from scipy.optimize import minimize

        # Flatten the arrays
        Q_new, E_new, Q_bench, E_bench, Y = Q_new.flatten(), E_new.flatten(), Q_bench.flatten(), E_bench.flatten(), Y.flatten()

        # Split into train and test sets
        train_size = Q_new.shape[0]//2
        Q_new_train, E_new_train = Q_new[:train_size], E_new[:train_size]
        Q_new_test, E_new_test = Q_new[train_size:], E_new[train_size:]
        Q_bench_train, E_bench_train = Q_bench[:train_size], E_bench[:train_size]
        Q_bench_test, E_bench_test = Q_bench[train_size:], E_bench[train_size:]
        Y_train, Y_test = Y[:train_size], Y[train_size:]

        #Fit the linear model
        bounds = [(0,1), (0,1)]
        alpha = minimize(lambda x: np.nanmean(self.loss(
            Q_new_train*x[0] + Q_bench_train*x[1],
            E_new_train*x[0] + E_bench_train*x[1], Y_train)),
                        [0.5, 0.5], bounds=bounds, method='SLSQP',
                        options={'disp': False}, tol=1e-6).x
        
        # Compute the population
        output = self.loss(Q_new_test, E_new_test, Y_test) - self.loss(
            Q_new_test*alpha[0] + Q_bench_test*alpha[1],
            E_new_test*alpha[0] + E_bench_test*alpha[1], Y_test)
        n = len(output)
        output = output[~np.isnan(output)]
        if len(output) < n:
            warnings.warn('There are NaN in the population! They have been removed.', UserWarning)
        return output
        

[docs]
    def __call__(self, Q_new, E_new, Q_bench, E_bench, Y, seed=None):
        '''

        INPUTS:
            - Q_new: ndarray
                the first set of quantile predictions
            - E_new: ndarray
                the first set of expected shortfall predictions
            - Q_bench: ndarray
                the second set of quantile predictions
            - E_bench: ndarray
                the second set of expected shortfall predictions
            - Y: ndarray
                the actual time series
            - seed: int, optional
                the seed for the random number generator. Default is None

        OUTPUTS:
            - statistic: float
                the test statistic
            - p_value: float
                the p-value of the test
        '''
        return super().__call__( self.en_transform(
            Q_new, E_new, Q_bench, E_bench, Y).flatten(), seed)


    

[docs]
def gaussian_tail_stats(theta, loc=0, scale=1):
    '''
    Compute the Value at Risk and Expected Shortfall for a Gaussian distribution

    INPUTS:
        - theta: float
            the quantile to compute the statistics
        - loc: ndarray, optional
            the mean of the distribution
        - scale: ndarray, optional
            the standard deviation of the distribution

    OUTPUTS:
        - var: ndarray
            the Value at Risk for a normal distribution with mean=loc and standard deviation=scale
        - es: ndarray
            the Expected Shortfall for a normal distribution with mean=loc and standard deviation=scale

    Example of usage
    ----------------
    .. code-block:: python

        import numpy as np
        from utils import gaussian_tail_stats

        res = gaussian_tail_stats(theta=0.05, loc=0, scale=1e-2) #Compute the VaR and the Expected Shortfall
        print('VaR =', res['var'], '    ES =', res['es'])
    
    '''
    from scipy.stats import norm
    # If working with scalar, convert to numpy array
    if isinstance(loc, (int, float)):
        loc = np.array([loc])
    if isinstance(scale, (int, float)):
        scale = np.array([scale])
    
    # Raise error if the dimensions do not match
    if loc.shape != scale.shape:
        raise ValueError(f'loc and scale must have the same dimensions!\nFound loc={loc.shape} and scale={scale.shape}')

    # Compute the Expected Shortfall
    var = np.zeros(len(loc))
    es = np.zeros(len(loc))
    for t in range(len(loc)):
        es[t] = loc[t] - scale[t]*norm.pdf(norm.ppf(1-theta))/theta
        var[t] = loc[t] + scale[t]*norm.ppf(theta)
    # If the input was a scalar, return scalars
    if len(var) == 1:
        return {'var':var[0], 'es':es[0]}
    else:
        return {'var':var, 'es':es}



[docs]
def tstudent_tail_stats(theta, df, loc=0, scale=1):
    '''
    Compute the Value at Risk and Expected Shortfall for a Student's t distribution

    INPUTS:
        - theta: float
            the quantile to compute the statistics
        - df: int
            the degrees of freedom of the distribution
        - loc: ndarray, optional
            the mean of the distribution
        - scale: ndarray, optional
            the standard deviation of the distribution

    OUTPUTS:
        - var: ndarray
            the Value at Risk for the t-distribution
        - es: ndarray
            the Expected Shortfall for the t-distribution
    
    Example of usage
    ----------------
    .. code-block:: python

        import numpy as np
        from utils import tstudent_tail_stats

        res = tstudent_tail_stats(theta=0.05, df=5, loc=0, scale=1e-2) #Compute the VaR and the Expected Shortfall
        print('VaR =', res['var'], '    ES =', res['es'])
    
    '''
    from scipy.stats import t as t_dist
    from scipy.special import gamma as gamma_func

    # If working with scalar, convert to numpy array
    if isinstance(loc, (int, float)):
        loc = np.array([loc])
    if isinstance(scale, (int, float)):
        scale = np.array([scale])
    
    # Raise error if the dimensions do not match
    if loc.shape != scale.shape:
        raise ValueError('loc and scale must have the same dimensions!')

    # Compute the Expected Shortfall
    cte = gamma_func((df+1)/2) / (np.sqrt(np.pi*df)*gamma_func(df/2))
    var = np.zeros(len(loc))
    es = np.zeros(len(loc))
    for t in range(len(loc)):
        var[t] = t_dist.ppf(theta, df=df, loc=0, scale=1)
        tau = cte * (1 + var[t]**2/df)**(-(1+df)/2)
        es[t] = loc[t] - scale[t] * (df + var[t]**2) * tau / ( (df-1) * theta)
        var[t] = loc[t] + var[t] * scale[t]
    # If the input was a scalar, return scalars
    if len(var) == 1:
        return {'var':var[0], 'es':es[0]}
    else:
        return {'var':var, 'es':es}