Source code for calibrain.run_manifest
from __future__ import annotations
import csv
from dataclasses import dataclass
from pathlib import Path
from typing import Any, Dict, Iterable, List, Mapping, Optional, Sequence
from calibrain.calibration_dataset import PosteriorSummary
[docs]
@dataclass(frozen=True)
class ManifestRow:
"""Typed view of a single manifest CSV row."""
summary_path: Path
metadata: Dict[str, Any]
def _as_int(value: str | None) -> Optional[int]:
if value is None:
return None
value = value.strip()
if value == "":
return None
try:
return int(float(value))
except ValueError:
return None
def _as_float(value: str | None) -> Optional[float]:
if value is None:
return None
value = value.strip()
if value == "":
return None
try:
return float(value)
except ValueError:
return None
def _as_str(value: str | None) -> Optional[str]:
if value is None:
return None
value = value.strip()
return value or None
def _coerce_manifest_metadata(row: Mapping[str, str]) -> Dict[str, Any]:
# Keep only fields that the calibration/aggregation pipeline expects to
# filter/group on. Everything else is preserved as strings.
meta: Dict[str, Any] = {}
for key in (
"global_run_id",
"run_id",
"seed",
"nnz",
"alpha_SNR",
"subject",
"orientation_type",
"coil_type",
"sensor_kind",
"n_sources",
"n_times",
"solver",
"noise_type",
"posterior_summary",
):
if key not in row:
continue
meta[key] = row.get(key)
# Coerce types where practical.
for key in ("global_run_id", "run_id", "seed", "nnz", "coil_type", "n_sources", "n_times"):
if key in meta:
meta[key] = _as_int(meta.get(key))
if "alpha_SNR" in meta:
meta["alpha_SNR"] = _as_float(meta.get("alpha_SNR"))
for key in ("subject", "orientation_type", "solver", "noise_type"):
if key in meta:
meta[key] = _as_str(meta.get(key))
# Preserve any remaining fields (as strings).
for key, value in row.items():
if key in meta:
continue
meta[key] = value
return meta
[docs]
def load_manifest_csv(path: str | Path) -> List[ManifestRow]:
path = Path(path)
if not path.exists():
raise FileNotFoundError(f"Manifest CSV not found: {path}")
rows: List[ManifestRow] = []
with path.open("r", encoding="utf-8", newline="") as handle:
reader = csv.DictReader(handle)
if reader.fieldnames is None:
raise ValueError(f"Manifest CSV has no header row: {path}")
for raw in reader:
meta = _coerce_manifest_metadata(raw)
summary_raw = raw.get("posterior_summary") or raw.get("summary_path") or raw.get("posterior_path")
summary_str = _as_str(summary_raw)
if not summary_str:
# Skip rows that do not point to a posterior summary file.
continue
summary_path = Path(summary_str)
rows.append(ManifestRow(summary_path=summary_path, metadata=meta))
return rows
[docs]
def summaries_from_manifest(path: str | Path) -> List[PosteriorSummary]:
rows = load_manifest_csv(path)
summaries: List[PosteriorSummary] = []
for row in rows:
summaries.append(PosteriorSummary(path=row.summary_path, metadata=row.metadata))
return summaries