Source code for epydemix.model.simulation_results

from dataclasses import dataclass
from typing import Any, Dict, List, Optional

import numpy as np
import pandas as pd

from .simulation_output import Trajectory


[docs] @dataclass class SimulationResults: """ Class to store and manage multiple simulation results. Attributes: trajectories (List[Trajectory]): List of simulation trajectories parameters (Dict[str, Any]): Dictionary of parameters used in the simulations """ trajectories: List[Trajectory] parameters: Dict[str, Any] @property def Nsim(self) -> int: """Number of simulations.""" return len(self.trajectories) @property def dates(self) -> List[pd.Timestamp]: """Simulation dates.""" return self.trajectories[0].dates if self.trajectories else [] @property def compartment_idx(self) -> Dict[str, int]: """Compartment indices.""" return self.trajectories[0].compartment_idx if self.trajectories else {}
[docs] def get_stacked_compartments( self, variables: Optional[List[str]] = None ) -> Dict[str, np.ndarray]: """ Get trajectories stacked into arrays of shape (Nsim, timesteps). Args: variables: List of compartment names to include. If None, all compartments are included. """ if not self.trajectories: return {} keys = variables if variables else self.trajectories[0].compartments.keys() return { comp_name: np.stack( [t.compartments[comp_name] for t in self.trajectories], axis=0 ) for comp_name in keys if comp_name in self.trajectories[0].compartments }
[docs] def get_stacked_transitions( self, variables: Optional[List[str]] = None ) -> Dict[str, np.ndarray]: """ Get trajectories stacked into arrays of shape (Nsim, timesteps). Args: variables: List of transition names to include. If None, all transitions are included. """ if not self.trajectories: return {} keys = variables if variables else self.trajectories[0].transitions.keys() return { trans_name: np.stack( [t.transitions[trans_name] for t in self.trajectories], axis=0 ) for trans_name in keys if trans_name in self.trajectories[0].transitions }
[docs] def get_quantiles( self, stacked: Dict[str, np.ndarray], quantiles: Optional[List[float]] = None, ignore_nan: bool = False, ) -> pd.DataFrame: """ Compute quantiles across all trajectories. Args: stacked: Dictionary of stacked trajectory arrays quantiles: List of quantile values to compute. If None, defaults to [0.025, 0.05, 0.25, 0.5, 0.75, 0.95, 0.975] ignore_nan: If True, use np.nanquantile to ignore NaN values. Defaults to False. When enabled, a warning is issued if any time point has >50% NaN values, as quantiles may be unreliable with small sample sizes. """ if quantiles is None: quantiles = [0.025, 0.05, 0.25, 0.5, 0.75, 0.95, 0.975] # Create dates and quantiles first (these will be the same for all compartments) dates = [] quantile_values = [] for q in quantiles: dates.extend(self.dates) quantile_values.extend([q] * len(self.dates)) # Initialize data dictionary with dates and quantiles data = {"date": dates, "quantile": quantile_values} # Add data quantile_func = np.nanquantile if ignore_nan else np.quantile # Check for high NaN proportions when ignore_nan is enabled if ignore_nan: import warnings for comp_name, comp_data in stacked.items(): nan_prop = np.isnan(comp_data).mean(axis=0) max_nan_prop = np.max(nan_prop) if max_nan_prop > 0.5: warnings.warn( f"Variable '{comp_name}' has time points with up to {max_nan_prop:.1%} NaN values. " f"Quantiles at these time points may be unreliable due to small sample size." ) for comp_name, comp_data in stacked.items(): comp_quantiles = [] for q in quantiles: quant_values = quantile_func(comp_data, q, axis=0) comp_quantiles.extend(quant_values) data[comp_name] = comp_quantiles return pd.DataFrame(data)
[docs] def get_quantiles_transitions( self, quantiles: Optional[List[float]] = None, ignore_nan: bool = False, variables: Optional[List[str]] = None, ) -> pd.DataFrame: """ Compute quantiles across all trajectories for transitions. The name of the columns are the transitions names and the demographic groups, in the following format: `{source_compartment_name}_to_{target_compartment_name}_{demographic_group}`. For example, the column `S_to_I_total` contains the quantiles of the number of individuals transitioning from susceptible ("S") to infected ("I") individuals across all demographic groups ("total"). Args: quantiles: List of quantile values to compute. If None, defaults to [0.025, 0.05, 0.25, 0.5, 0.75, 0.95, 0.975] ignore_nan: If True, use np.nanquantile to ignore NaN values. Defaults to False. variables: List of transition names to include. If None, all transitions are included. """ stacked = self.get_stacked_transitions(variables=variables) return self.get_quantiles(stacked, quantiles, ignore_nan)
[docs] def get_quantiles_compartments( self, quantiles: Optional[List[float]] = None, ignore_nan: bool = False, variables: Optional[List[str]] = None, ) -> pd.DataFrame: """ Compute quantiles across all trajectories for compartments. The name of the columns are the compartments names and the demographic groups, in the following format: `{compartment_name}_{demographic_group}`. For example, the column `I_total` contains the quantiles of the number of infected ("I") individuals across all demographic groups ("total"). Args: quantiles: List of quantile values to compute. If None, defaults to [0.025, 0.05, 0.25, 0.5, 0.75, 0.95, 0.975] ignore_nan: If True, use np.nanquantile to ignore NaN values. Defaults to False. variables: List of compartment names to include. If None, all compartments are included. """ stacked = self.get_stacked_compartments(variables=variables) return self.get_quantiles(stacked, quantiles, ignore_nan)
[docs] def resample( self, freq: str, method: str = "last", fill_method: str = "ffill" ) -> "SimulationResults": """Resample all trajectories to new frequency.""" return SimulationResults( trajectories=[ t.resample(freq, method, fill_method) for t in self.trajectories ], parameters=self.parameters, )