Source code for phise.modules.test_statistics

"""Test statistics to compare two samples (H0 vs H1).

This module provides simple statistics (mean, median, AUC of distances,
etc.) as well as non-parametric tests (SciPy) to measure the separability
between two data distributions.
"""
from __future__ import annotations

import numpy as np
import matplotlib.pyplot as plt
plt.rcParams['image.origin'] = 'lower'
from copy import deepcopy as copy
import astropy.units as u
from scipy import stats


[docs] def get_vectors(ctx=None, nmc: int = 1000, size: int = 1000, progress_callback=None, flatten: bool = True, randomize_position: bool = False): """Generate two sets of statistic vectors under H0 and H1. This simulates observations with and without companion(s) to build statistic arrays of shape ``(nb_processed_outputs, nmc, size)`` under H0 (no companion) and H1 (with companions), then concatenates each set along the first axis. Notes: - Assumes a compatible ``Context`` object exists (see ``phise.classes.context.Context``) and relies on its ``observe()`` method. - To avoid circular imports, the ``Context`` import is local and only used if ``ctx`` is ``None``. Args: ctx: Observation context (if ``None``, a default VLTI context is instantiated). The context must contain at least one companion to generate H1. nmc: Number of Monte-Carlo realizations. size: Number of samples per realization. progress_callback (callable, optional): function accepting a float (0-1) representing the progress. flatten (bool, optional): If True, concatenates independent kernel outputs. If False, returns shapes (nb_processed_outputs, nmc, size). Defaults to True. randomize_position (bool, optional): If True, randomizes companion position (uniform in FOV) for each sample. If False, uses the fixed position defined in `ctx`. Defaults to False. Returns: Tuple ``(T0, T1)`` where: - T0: Concatenated vectors under H0, shape ``(nb_processed_outputs * nmc * size,)`` if flatten=True. - T1: Concatenated vectors under H1, shape ``(nb_processed_outputs * nmc * size,)`` if flatten=True. Raises: ValueError: If ``ctx`` contains no companions. """ if ctx is None: # import local pour éviter un import circulaire lors du chargement from phise.examples.VLTI_context import get_VLTI ctx = get_VLTI() ctx.interferometer.chip.σ = np.zeros_like(ctx.interferometer.chip.σ.value) * u.m if ctx.target.companions == []: raise ValueError( 'No companions in the context. Please add companions to the context before generating vectors.' ) ctx_h1 = copy(ctx) ctx_h0 = copy(ctx) ctx_h0.target.companions = [] nb_proc = ctx.interferometer.chip.nb_processed_outputs T0 = np.zeros((nb_proc, nmc, size)) T1 = np.zeros((nb_proc, nmc, size)) fov = ctx.interferometer.fov.to(u.mas).value for i in range(nmc): if progress_callback: progress_callback(i / nmc) else: print(f' Generating vectors... {round(i / nmc * 100, 2)}%', end='\r') for j in range(size): if randomize_position: for c in ctx_h1.target.companions: c.θ = np.random.uniform(0, 2 * np.pi) * u.rad c.ρ = np.random.uniform(fov / 10, fov) * u.mas outs_h0 = ctx_h0.observe() outs_h1 = ctx_h1.observe() k_h0 = ctx_h0.interferometer.chip.process_outputs(outs_h0) k_h1 = ctx_h1.interferometer.chip.process_outputs(outs_h1) T0[:, i, j] = k_h0 T1[:, i, j] = k_h1 if progress_callback: progress_callback(1.0) else: print(' Vectors generation complete') if flatten: return (np.concatenate(T0), np.concatenate(T1)) else: return (T0, T1)
[docs] def mean(u, v): """Absolute value of the mean of ``u``. Args: u: Samples under H1 (or any real/complex vector). v: Samples under H0 (unused). Returns: float: Absolute value of ``mean(u)``. """ return np.abs(np.mean(u))
[docs] def median(u, v): """Absolute value of the median of ``u``. Args: u: Samples under H1. v: Samples under H0 (unused). Returns: float: Absolute value of ``median(u)``. """ return np.abs(np.median(u))
[docs] def argmax(u, v, bins: int = 100): """Approximate mode of ``u`` using a histogram (most frequent bin). Args: u: Samples under H1. v: Samples under H0 (unused). bins: Number of histogram bins. Returns: float: Absolute center value of the bin maximizing the histogram. """ (hist, bin_edges) = np.histogram(u, bins=bins) bin_edges = (bin_edges[1:] + bin_edges[:-1]) / 2 return np.abs(bin_edges[np.argmax(hist)])
[docs] def argmax50(u, v): """Shortcut for ``argmax(u, v, bins=50)``.""" return argmax(u, v, 50)
[docs] def argmax100(u, v): """Shortcut for ``argmax(u, v, bins=100)``.""" return argmax(u, v, 100)
[docs] def argmax500(u, v): """Shortcut for ``argmax(u, v, bins=500)``.""" return argmax(u, v, 500)
[docs] def kolmogorov_smirnov(u, v): """Two-sample Kolmogorov–Smirnov statistic. Returns: float: Absolute value of ``D``, the maximal distance between empirical CDFs. """ return np.abs(stats.ks_2samp(u, v).statistic)
[docs] def cramer_von_mises(u, v): """Two-sample Cramér–von Mises statistic. Returns: float: Absolute value of the statistic. """ return np.abs(stats.cramervonmises_2samp(u, v).statistic)
[docs] def mannwhitneyu(u, v): """Mann–Whitney U statistic (Wilcoxon rank-sum). Returns: float: Absolute value of the statistic. """ return np.abs(stats.mannwhitneyu(u, v).statistic)
[docs] def wilcoxon_mann_whitney(u, v): """Wilcoxon signed-rank test statistic (paired samples). Returns: float: Absolute value of the statistic. """ return np.abs(stats.wilcoxon(u, v).statistic)
[docs] def anderson_darling(u, v): """Anderson–Darling k-sample statistic. Returns: float: Absolute value of the statistic. """ return np.abs(stats.anderson_ksamp([u, v]).statistic)
[docs] def brunner_munzel(u, v): """Brunner–Munzel test statistic. Returns: float: Absolute value of the statistic. """ return np.abs(stats.brunnermunzel(u, v).statistic)
[docs] def wasserstein_distance(u, v): """Wasserstein distance (earth mover's distance, order 1). Returns: float: Absolute value of the Wasserstein distance ``W1(u, v)``. """ return np.abs(stats.wasserstein_distance(u, v))
[docs] def flattening(u, v): """Sum of absolute deviations from the median. Returns: float: Measure of "flattening" around the median. """ med = np.median(u) return np.sum(np.abs(u - med))
[docs] def shift_and_flattening(u, v): """Area under the curve of sorted distances to the median (AUC). Definition: if ``d_i = abs(u_i - median(u))`` sorted, returns the area of ``d(x) + abs(median)`` over ``x in [0, 1]``. Returns: float: Area under the curve approximated via numerical integration. """ med = np.median(u) distances = np.sort(np.abs(u - med)) x = np.linspace(0, 1, len(u)) auc = np.trapezoid(distances + np.abs(med), x) return auc
[docs] def median_of_abs(u, v): """Median of absolute values: ``median(abs(u))``.""" return np.median(np.abs(u))
[docs] def full_sum(u, v): """Sum of absolute values: ``sum(abs(u))``.""" return np.sum(np.abs(u))
ALL_TESTS = { 'Mean': mean, 'Median': median, 'Kolmogorov-Smirnov': kolmogorov_smirnov, 'Cramer von Mises': cramer_von_mises, 'Flattening': flattening, 'Median of Abs': median_of_abs, }