Coverage for tcvx21/analysis/statistics

1import xarray as xr

2from numpy.random import default_rng

3from tcvx21.units_m import Dimensionless

4from typing import Union

5from scipy import stats

6import numpy as np

7import sys

10def strip_moment(observable_key: str):

11 """Convert a observable name to a base observable and a statistical moment"""

13 if observable_key.endswith('_std'):

14 moment = 'std'

15 key = observable_key.rstrip('std').rstrip('_')

16 elif observable_key.endswith('_skew'):

17 moment = 'skew'

18 key = observable_key.rstrip('skew').rstrip('_')

19 elif observable_key.endswith('_kurtosis'):

20 moment = 'kurt'

21 key = observable_key.rstrip('kurtosis').rstrip('_')

22 else:

23 moment = 'mean'

24 key = observable_key

26 return key, moment

29def compute_statistical_moment(values, moment: str = 'mean', dimension=('tau', 'phi')):

30 """Compute the statistical moment"""

31 if moment == 'mean':

32 return values.mean(dim=dimension)

33 elif moment == 'std':

34 return values.std(dim=dimension)

35 elif moment == 'skew':

36 return skew(values, dim=dimension)

37 elif moment == 'kurt':

38 return kurtosis(values, dim=dimension, fisher=False)

39 elif moment == 'excess_kurt':

40 return kurtosis(values, dim=dimension, fisher=True)

41 else:

42 raise NotImplementedError(f"No implementation for {moment}")

45def compute_statistical_moment_with_bootstrap(values, moment: str = 'mean', n_tests: int = 100, ci=0.95, random_seed=None):

46 """

47 Return the statistical moment and an estimate of its uncertainty due to finite

48 sample size effects

49 """

51 assert moment in ['mean', 'std', 'skew', 'kurt', 'excess_kurt']

53 if "pytest" in sys.modules:

54 # Use a constant seed if using pytest, otherwise tests won't be reproducible

55 rng = default_rng(seed=12345)

56 else:

57 rng = default_rng(seed=random_seed)

59 stacked_values = values.stack(samples=('tau', 'phi'))

61 n_points = stacked_values.sizes['points']

62 n_samples = np.round(stacked_values.sizes['samples']).astype(int)

64 random_sample = rng.integers(low=0, high=stacked_values.sizes['samples'],

65 size=(n_points, n_tests, n_samples))

67 random_sample = xr.DataArray(random_sample,

68 dims=('points', 'tests', 'samples'))

70 sampled_values = stacked_values.isel(samples=random_sample)

72 test_moment = compute_statistical_moment(sampled_values, moment=moment, dimension='samples')

74 # What fraction of points should we exclude as outside our confidence interval (take factor of two since

75 # we calculate a two-sided confidence interval).

77 fraction_excluded = (1.0-ci)/2.0

79 low, centre, high = test_moment.quantile(q=[fraction_excluded, 0.5, 1-fraction_excluded], dim='tests')

81 return centre, np.abs(high-low)

84def skew(input_array: xr.DataArray, dim: Union[str, tuple] = ('tau', 'phi'),

85 bias: bool = True, nan_policy: str = 'propagate') -> xr.DataArray:

86 """

87 Calculates the sample skewness of a given xarray (Fisher-Pearson coefficient of skewness)

89 For normally distributed data, the skewness should be about zero. For unimodal continuous distributions,

90 a skewness value greater than zero means that there is more weight in the right tail of the distribution.

92 Unlike the standard scipy.stats version, this function can s

94 See https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.skew.html for kwargs

96 Note that the skew is dimensionless

97 """

99 assert isinstance(dim, str) or isinstance(dim, tuple),\

100 f"dim must be either str or tuple, but was {dim} of type {type(dim)}"

101 if isinstance(dim, tuple) and len(dim) > 1:

102 # Can only supply a single dimension to reduce with scipy.stats. To circumvent this, we

103 # 'stack' the multidimensions into a single dimension

104 input_array = input_array.stack(stacked_dim=dim)

105 dim = 'stacked_dim'

106

107 skew_array = input_array.reduce(

108 func=stats.skew,

109 dim=dim,

110 keep_attrs=True,

111 bias=bias,

112 nan_policy=nan_policy

113 )

114

115 skew_array.attrs['norm'] = Dimensionless

116 skew_array.attrs['units'] = ''

117 return skew_array

118

119

120def kurtosis(input_array: xr.DataArray, dim: Union[str, tuple] = ('tau', 'phi'),

121 fisher: bool = False, bias: bool = True, nan_policy: str = 'propagate'):

122 """

123 Calculates the sample kurtosis of a given xarray

124

125 Kurtosis is the fourth central moment divided by the square of the variance.

126 In Pearson's definition (fisher = False), this value is returned directly

127 In Fisher’s definition (fisher = True), 3.0 is subtracted from the result to give 0.0 for a normal distribution.

128 (Fisher's definition is typically termed the "excess kurtosis")

129

130 https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.kurtosis.html

131

132 Note that the kurtosis is dimensionless

133 """

134

135 assert isinstance(dim, str) or isinstance(dim, tuple),\

136 f"dim must be either str or tuple, but was {dim} of type {type(dim)}"

137 if isinstance(dim, tuple) and len(dim) > 1:

138 # Can only supply a single dimension to reduce with scipy.stats. To circumvent this, we

139 # 'stack' the multidimensions into a single dimension

140 input_array = input_array.stack(stacked_dim=dim)

141 dim = 'stacked_dim'

142

143 kurt_array = input_array.reduce(

144 func=stats.kurtosis,

145 dim=dim,

146 keep_attrs=True,

147 fisher=fisher,

148 bias=bias,

149 nan_policy=nan_policy

150 )

151

152 kurt_array.attrs['norm'] = Dimensionless

153 kurt_array.attrs['units'] = ''

154 return kurt_array

Coverage for tcvx21/analysis/statistics_m.py: 94%

65 statements