Source code for calibrain.run_manifest

from __future__ import annotations

import csv
from dataclasses import dataclass
from pathlib import Path
from typing import Any, Dict, Iterable, List, Mapping, Optional, Sequence

from calibrain.calibration_dataset import PosteriorSummary


[docs] @dataclass(frozen=True) class ManifestRow: """Typed view of a single manifest CSV row.""" summary_path: Path metadata: Dict[str, Any]
def _as_int(value: str | None) -> Optional[int]: if value is None: return None value = value.strip() if value == "": return None try: return int(float(value)) except ValueError: return None def _as_float(value: str | None) -> Optional[float]: if value is None: return None value = value.strip() if value == "": return None try: return float(value) except ValueError: return None def _as_str(value: str | None) -> Optional[str]: if value is None: return None value = value.strip() return value or None def _coerce_manifest_metadata(row: Mapping[str, str]) -> Dict[str, Any]: # Keep only fields that the calibration/aggregation pipeline expects to # filter/group on. Everything else is preserved as strings. meta: Dict[str, Any] = {} for key in ( "global_run_id", "run_id", "seed", "nnz", "alpha_SNR", "subject", "orientation_type", "coil_type", "sensor_kind", "n_sources", "n_times", "solver", "noise_type", "posterior_summary", ): if key not in row: continue meta[key] = row.get(key) # Coerce types where practical. for key in ("global_run_id", "run_id", "seed", "nnz", "coil_type", "n_sources", "n_times"): if key in meta: meta[key] = _as_int(meta.get(key)) if "alpha_SNR" in meta: meta["alpha_SNR"] = _as_float(meta.get("alpha_SNR")) for key in ("subject", "orientation_type", "solver", "noise_type"): if key in meta: meta[key] = _as_str(meta.get(key)) # Preserve any remaining fields (as strings). for key, value in row.items(): if key in meta: continue meta[key] = value return meta
[docs] def load_manifest_csv(path: str | Path) -> List[ManifestRow]: path = Path(path) if not path.exists(): raise FileNotFoundError(f"Manifest CSV not found: {path}") rows: List[ManifestRow] = [] with path.open("r", encoding="utf-8", newline="") as handle: reader = csv.DictReader(handle) if reader.fieldnames is None: raise ValueError(f"Manifest CSV has no header row: {path}") for raw in reader: meta = _coerce_manifest_metadata(raw) summary_raw = raw.get("posterior_summary") or raw.get("summary_path") or raw.get("posterior_path") summary_str = _as_str(summary_raw) if not summary_str: # Skip rows that do not point to a posterior summary file. continue summary_path = Path(summary_str) rows.append(ManifestRow(summary_path=summary_path, metadata=meta)) return rows
[docs] def summaries_from_manifest(path: str | Path) -> List[PosteriorSummary]: rows = load_manifest_csv(path) summaries: List[PosteriorSummary] = [] for row in rows: summaries.append(PosteriorSummary(path=row.summary_path, metadata=row.metadata)) return summaries