Source code for jesterTOV.inference.flows.bilby_extract
"""Extract GW posterior samples from bilby result HDF5 files.
This module provides utilities to read bilby result files and extract
the parameters needed for jester's GW likelihood: ``mass_1_source``,
``mass_2_source``, ``lambda_1``, and ``lambda_2``.
Bilby serialises its ``Result`` object to HDF5 using a recursive
dict-to-group mapping (``recursively_save_dict_contents_to_group``).
The ``posterior`` attribute — a pandas DataFrame
of reweighted samples — is stored as an HDF5 group with one dataset per
parameter column. This is the canonical source for derived parameters such
as ``mass_1_source`` and ``lambda_1``.
Note: the file also contains a ``samples`` dataset (raw nested-sampling live
points) and ``search_parameter_keys``, but those are unweighted sampler outputs
and do not include derived quantities. This module reads only the ``posterior``
group.
No bilby installation is required.
"""
import argparse
from pathlib import Path
import numpy as np
from jesterTOV.logging_config import get_logger
logger = get_logger("jester")
# Parameters required in the final .npz output
_REQUIRED_OUTPUT_PARAMS: list[str] = [
"mass_1_source",
"mass_2_source",
"lambda_1",
"lambda_2",
]
# ---------------------------------------------------------------------------
# HDF5 reading
# ---------------------------------------------------------------------------
def _read_bilby_hdf5(filepath: str) -> dict[str, np.ndarray]:
"""Read the ``posterior`` group from a bilby result HDF5 file.
Bilby stores the reweighted posterior as an HDF5 group with one dataset
per parameter column (``f["posterior"]["<param>"]``). This is the layout
produced by ``recursively_save_dict_contents_to_group`` since bilby 1.1.0.
Parameters
----------
filepath : str
Path to the bilby result ``.hdf5`` file.
Returns
-------
dict[str, np.ndarray]
Mapping from parameter name to 1-D array of posterior samples.
Raises
------
ValueError
If the file does not contain a ``posterior`` group.
"""
import h5py
with h5py.File(filepath, "r") as f:
if "posterior" not in f or not isinstance(f["posterior"], h5py.Group):
raise ValueError(
f"No 'posterior' group found in '{filepath}'. "
"Expected a bilby result file saved with bilby >= 1.1.0. "
f"Available top-level keys: {list(f.keys())}"
)
posterior = f["posterior"]
assert isinstance(posterior, h5py.Group)
return {key: np.array(posterior[key]) for key in posterior.keys()} # type: ignore[index]
# ---------------------------------------------------------------------------
# Public API
# ---------------------------------------------------------------------------
# ---------------------------------------------------------------------------
# CLI entry point
# ---------------------------------------------------------------------------
def main() -> None:
"""Command-line interface for extracting GW posteriors from bilby results.
Usage::
jester_extract_gw_posterior_bilby result.hdf5 [--output out.npz]
"""
parser = argparse.ArgumentParser(
prog="jester_extract_gw_posterior_bilby",
description=(
"Extract mass_1_source, mass_2_source, lambda_1, lambda_2 from a "
"bilby result HDF5 file and save them as a .npz file for use with "
"jester's GW flow training pipeline."
),
)
parser.add_argument(
"bilby_result_file",
type=str,
help="Path to bilby HDF5 result file",
)
parser.add_argument(
"--output",
type=str,
default=None,
help=(
"Output .npz file path. Defaults to the same directory as the "
"input with '_gw_jester_posterior.npz' appended to the stem."
),
)
args = parser.parse_args()
output_path = extract_gw_posterior_from_bilby(
bilby_result_file=args.bilby_result_file,
output_file=args.output,
)
logger.info(f"Saved: {output_path}")