Coverage for tcvx21/quant_validation/ricci_metric_m.py: 94%
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1"""
2Routines for calculating the Ricci validation metric
3"""
4import warnings
6import numpy as np
7import pandas as pd
8from collections import namedtuple
10from tcvx21 import Quantity
11from tcvx21.record_c import Record
12from tcvx21.observable_c import Observable
15def compute_comparison_data(experiment: Observable, simulation: Observable):
16 """
17 Calculates the terms to compute the metric terms
18 """
20 if experiment.dimensionality == 0 and simulation.dimensionality == 0:
21 mapped_simulation = simulation
22 mask = [True]
24 elif experiment.dimensionality == 1 and simulation.dimensionality == 1:
25 mask = simulation.points_overlap(experiment.positions)
26 mapped_simulation = simulation.interpolate_onto_positions(experiment.positions[mask])
28 elif experiment.dimensionality == 2 and simulation.dimensionality == 2:
29 mapped_simulation = simulation.interpolate_onto_reference(experiment)
30 mask = mapped_simulation.mask
32 else:
33 raise NotImplementedError(f"No implementation for comparing {type(experiment)} and {type(simulation)}")
35 result = {'experimental_value': experiment.values[mask],
36 'experimental_error': experiment.errors[mask],
37 'simulation_value': mapped_simulation.values,
38 'simulation_error': mapped_simulation.errors}
40 return result
43def calculate_normalised_ricci_distance_from_observables(experiment: Observable, simulation: Observable):
44 """
45 Computes the d value between an experiment and a simulation
46 """
48 return calculate_normalised_ricci_distance(**compute_comparison_data(experiment, simulation))
51def calculate_normalised_ricci_distance(experimental_value: Quantity, experimental_error: Quantity,
52 simulation_value: Quantity, simulation_error: Quantity,
53 debug: bool = False):
54 """
55 Calculates the error-normalised Ricci distance between an experimental signal and a simulation.
57 Note that the simulation and experiment values must be arrays at the same position. To achieve this,
58 call interpolate_onto_experimental_points and pass the returned mapped simulation values, plus the experimental
59 values with the returned mask.
60 """
61 N_points = experimental_value.size
62 assert np.all(np.array([experimental_error.size, simulation_value.size, simulation_value.size]) == N_points), \
63 f"{[N_points, experimental_error.size, simulation_value.size, simulation_value.size]}"
65 return np.sqrt(1.0 / N_points * np.sum(
66 (experimental_value - simulation_value) ** 2 /
67 (experimental_error ** 2 + simulation_error ** 2),
68 )).to('').magnitude
71def calculate_ricci_sensitivity(experimental_value: Quantity, experimental_error: Quantity,
72 simulation_value: Quantity, simulation_error: Quantity,
73 float = 1E-3):
74 """
75 Calculates the Ricci sensitivity, which is a measure of the total uncertainty relative to the magnitude of
76 the values.
78 Note that the simulation and experiment values must be arrays at the same position. To achieve this,
79 call interpolate_onto_experimental_points and pass the returned mapped simulation values, plus the experimental
80 values with the returned mask.
81 """
83 return np.exp(-1 *
84 (np.sum(np.abs(experimental_error) + np.abs(simulation_error))) /
85 (np.sum(np.abs(experimental_value) + np.abs(simulation_value)))
86 ).to('').magnitude
89def level_of_agreement_function(distance: float, agreement_threshold=1.0, transition_sharpness=0.5) -> float:
90 """
91 Calculates the level of agreement (R) from the normalised distance (d)
92 """
93 return (np.tanh(
94 (distance - 1.0 / distance - agreement_threshold) / transition_sharpness
95 ) + 1.0) / 2.0
98class RicciValidation:
100 def __init__(self, experimental_dataset: Record, simulation_dataset: Record):
101 """
102 Stores a single experiment and a single experiment, to allow easy iteration over all stored elements
103 """
104 self.experiment = experimental_dataset
105 self.simulation = simulation_dataset
107 # Skip observables where there is no experimental data
108 self.keys = [key for key in self.experiment.keys() if not experimental_dataset.get_observable(*key).is_empty]
110 self.distance, self.hierarchy, self.sensitivity = {}, {}, {}
112 for dictionary in [self.distance, self.sensitivity]:
113 for diagnostic, observable in self.keys:
115 dictionary.setdefault(diagnostic, {})
116 dictionary[diagnostic].setdefault(observable, np.nan)
118 for diagnostic, observable in self.keys:
120 self.hierarchy.setdefault(diagnostic, {})
122 ds_entry = self.simulation.get_nc_group(diagnostic, observable)
124 self.hierarchy[diagnostic][observable] = \
125 ds_entry.experimental_hierarchy + ds_entry.simulation_hierarchy - 1
127 def calculate_metric_terms(self):
128 """
129 Compute the terms which are used for the metric
130 """
132 for diagnostic, observable in self.keys:
133 key = (diagnostic, observable)
135 experiment = self.experiment.get_observable(*key)
137 simulation = self.simulation.get_observable(*key)
138 if simulation.is_empty:
139 continue
141 comparison_data = compute_comparison_data(experiment, simulation)
143 self.distance[diagnostic][observable] = \
144 calculate_normalised_ricci_distance(**comparison_data)
146 self.sensitivity[diagnostic][observable] = \
147 calculate_ricci_sensitivity(**comparison_data)
149 def compute_chi(self, agreement_threshold: float = 1.0, transition_sharpness: float = 0.5):
150 """
151 Calculates the 'chi' validation metric and 'Q' quality factor
152 """
153 chi_numerator = 0.0
154 chi_denominator = 0.0
156 for diagnostic, observable in self.keys:
158 distance = self.distance[diagnostic][observable]
159 hierarchy = self.hierarchy[diagnostic][observable]
160 sensitivity = self.sensitivity[diagnostic][observable]
162 if np.any(np.isnan(distance)) or np.any(np.isnan(sensitivity)):
163 # Penalise missing data. This is a harsh penalty, but acts as a strong disincentive to
164 # drop data which would otherwise increase a comparisons chi. This way, including any data is guaranteed
165 # to improve the chi value compared to leaving a field blank.
166 R, S, H = 1.0, 1.0, 1.0
167 else:
168 R = level_of_agreement_function(distance, agreement_threshold, transition_sharpness)
169 S = sensitivity
170 H = 1.0 / hierarchy
172 chi_numerator += R * H * S
173 chi_denominator += H * S
175 chi = chi_numerator / chi_denominator
176 Q = chi_denominator
178 return chi, Q
180 def as_dataframe(self, agreement_threshold: float = 1.0, transition_sharpness: float = 0.5):
181 """
182 Returns a dataframe of the calculation
183 """
185 Row = namedtuple('Row', ('distance', 'hierarchy', 'sensitivity'))
187 diagnostics, observables, rows = [], [], []
189 for diagnostic, observable in self.keys:
190 d = self.distance[diagnostic][observable]
191 H = self.hierarchy[diagnostic][observable]
192 S = self.sensitivity[diagnostic][observable]
194 diagnostics.append(diagnostic)
195 observables.append(observable)
196 rows.append(Row(d, H, S))
198 index = pd.MultiIndex.from_tuples(list(zip(diagnostics, observables)),
199 names=["diagnostic", "observable"])
201 df = pd.DataFrame(rows, index=index)
203 return df