Coverage for tcvx21/analysis/statistics_m.py: 94%
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1import xarray as xr
2from numpy.random import default_rng
3from tcvx21.units_m import Dimensionless
4from typing import Union
5from scipy import stats
6import numpy as np
7import sys
10def strip_moment(observable_key: str):
11 """Convert a observable name to a base observable and a statistical moment"""
13 if observable_key.endswith('_std'):
14 moment = 'std'
15 key = observable_key.rstrip('std').rstrip('_')
16 elif observable_key.endswith('_skew'):
17 moment = 'skew'
18 key = observable_key.rstrip('skew').rstrip('_')
19 elif observable_key.endswith('_kurtosis'):
20 moment = 'kurt'
21 key = observable_key.rstrip('kurtosis').rstrip('_')
22 else:
23 moment = 'mean'
24 key = observable_key
26 return key, moment
29def compute_statistical_moment(values, moment: str = 'mean', dimension=('tau', 'phi')):
30 """Compute the statistical moment"""
31 if moment == 'mean':
32 return values.mean(dim=dimension)
33 elif moment == 'std':
34 return values.std(dim=dimension)
35 elif moment == 'skew':
36 return skew(values, dim=dimension)
37 elif moment == 'kurt':
38 return kurtosis(values, dim=dimension, fisher=False)
39 elif moment == 'excess_kurt':
40 return kurtosis(values, dim=dimension, fisher=True)
41 else:
42 raise NotImplementedError(f"No implementation for {moment}")
45def compute_statistical_moment_with_bootstrap(values, moment: str = 'mean', n_tests: int = 100, ci=0.95, random_seed=None):
46 """
47 Return the statistical moment and an estimate of its uncertainty due to finite
48 sample size effects
49 """
51 assert moment in ['mean', 'std', 'skew', 'kurt', 'excess_kurt']
53 if "pytest" in sys.modules:
54 # Use a constant seed if using pytest, otherwise tests won't be reproducible
55 rng = default_rng(seed=12345)
56 else:
57 rng = default_rng(seed=random_seed)
59 stacked_values = values.stack(samples=('tau', 'phi'))
61 n_points = stacked_values.sizes['points']
62 n_samples = np.round(stacked_values.sizes['samples']).astype(int)
64 random_sample = rng.integers(low=0, high=stacked_values.sizes['samples'],
65 size=(n_points, n_tests, n_samples))
67 random_sample = xr.DataArray(random_sample,
68 dims=('points', 'tests', 'samples'))
70 sampled_values = stacked_values.isel(samples=random_sample)
72 test_moment = compute_statistical_moment(sampled_values, moment=moment, dimension='samples')
74 # What fraction of points should we exclude as outside our confidence interval (take factor of two since
75 # we calculate a two-sided confidence interval).
77 fraction_excluded = (1.0-ci)/2.0
79 low, centre, high = test_moment.quantile(q=[fraction_excluded, 0.5, 1-fraction_excluded], dim='tests')
81 return centre, np.abs(high-low)
84def skew(input_array: xr.DataArray, dim: Union[str, tuple] = ('tau', 'phi'),
85 bias: bool = True, nan_policy: str = 'propagate') -> xr.DataArray:
86 """
87 Calculates the sample skewness of a given xarray (Fisher-Pearson coefficient of skewness)
89 For normally distributed data, the skewness should be about zero. For unimodal continuous distributions,
90 a skewness value greater than zero means that there is more weight in the right tail of the distribution.
92 Unlike the standard scipy.stats version, this function can s
94 See https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.skew.html for kwargs
96 Note that the skew is dimensionless
97 """
99 assert isinstance(dim, str) or isinstance(dim, tuple),\
100 f"dim must be either str or tuple, but was {dim} of type {type(dim)}"
101 if isinstance(dim, tuple) and len(dim) > 1:
102 # Can only supply a single dimension to reduce with scipy.stats. To circumvent this, we
103 # 'stack' the multidimensions into a single dimension
104 input_array = input_array.stack(stacked_dim=dim)
105 dim = 'stacked_dim'
107 skew_array = input_array.reduce(
108 func=stats.skew,
109 dim=dim,
110 keep_attrs=True,
111 bias=bias,
112 nan_policy=nan_policy
113 )
115 skew_array.attrs['norm'] = Dimensionless
116 skew_array.attrs['units'] = ''
117 return skew_array
120def kurtosis(input_array: xr.DataArray, dim: Union[str, tuple] = ('tau', 'phi'),
121 fisher: bool = False, bias: bool = True, nan_policy: str = 'propagate'):
122 """
123 Calculates the sample kurtosis of a given xarray
125 Kurtosis is the fourth central moment divided by the square of the variance.
126 In Pearson's definition (fisher = False), this value is returned directly
127 In Fisher’s definition (fisher = True), 3.0 is subtracted from the result to give 0.0 for a normal distribution.
128 (Fisher's definition is typically termed the "excess kurtosis")
130 https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.kurtosis.html
132 Note that the kurtosis is dimensionless
133 """
135 assert isinstance(dim, str) or isinstance(dim, tuple),\
136 f"dim must be either str or tuple, but was {dim} of type {type(dim)}"
137 if isinstance(dim, tuple) and len(dim) > 1:
138 # Can only supply a single dimension to reduce with scipy.stats. To circumvent this, we
139 # 'stack' the multidimensions into a single dimension
140 input_array = input_array.stack(stacked_dim=dim)
141 dim = 'stacked_dim'
143 kurt_array = input_array.reduce(
144 func=stats.kurtosis,
145 dim=dim,
146 keep_attrs=True,
147 fisher=fisher,
148 bias=bias,
149 nan_policy=nan_policy
150 )
152 kurt_array.attrs['norm'] = Dimensionless
153 kurt_array.attrs['units'] = ''
154 return kurt_array