Source code for jesterTOV.inference.flows.bilby_extract

"""Extract GW posterior samples from bilby result HDF5 files.

This module provides utilities to read bilby result files and extract
the parameters needed for jester's GW likelihood: ``mass_1_source``,
``mass_2_source``, ``lambda_1``, and ``lambda_2``.

Bilby serialises its ``Result`` object to HDF5 using a recursive
dict-to-group mapping (``recursively_save_dict_contents_to_group``).
The ``posterior`` attribute — a pandas DataFrame
of reweighted samples — is stored as an HDF5 group with one dataset per
parameter column.  This is the canonical source for derived parameters such
as ``mass_1_source`` and ``lambda_1``.

Note: the file also contains a ``samples`` dataset (raw nested-sampling live
points) and ``search_parameter_keys``, but those are unweighted sampler outputs
and do not include derived quantities.  This module reads only the ``posterior``
group.

No bilby installation is required.
"""

import argparse
from pathlib import Path

import numpy as np

from jesterTOV.logging_config import get_logger


logger = get_logger("jester")

# Parameters required in the final .npz output
_REQUIRED_OUTPUT_PARAMS: list[str] = [
    "mass_1_source",
    "mass_2_source",
    "lambda_1",
    "lambda_2",
]


# ---------------------------------------------------------------------------
# HDF5 reading
# ---------------------------------------------------------------------------


def _read_bilby_hdf5(filepath: str) -> dict[str, np.ndarray]:
    """Read the ``posterior`` group from a bilby result HDF5 file.

    Bilby stores the reweighted posterior as an HDF5 group with one dataset
    per parameter column (``f["posterior"]["<param>"]``).  This is the layout
    produced by ``recursively_save_dict_contents_to_group`` since bilby 1.1.0.

    Parameters
    ----------
    filepath : str
        Path to the bilby result ``.hdf5`` file.

    Returns
    -------
    dict[str, np.ndarray]
        Mapping from parameter name to 1-D array of posterior samples.

    Raises
    ------
    ValueError
        If the file does not contain a ``posterior`` group.
    """
    import h5py

    with h5py.File(filepath, "r") as f:
        if "posterior" not in f or not isinstance(f["posterior"], h5py.Group):
            raise ValueError(
                f"No 'posterior' group found in '{filepath}'. "
                "Expected a bilby result file saved with bilby >= 1.1.0. "
                f"Available top-level keys: {list(f.keys())}"
            )
        posterior = f["posterior"]
        assert isinstance(posterior, h5py.Group)
        return {key: np.array(posterior[key]) for key in posterior.keys()}  # type: ignore[index]


# ---------------------------------------------------------------------------
# Public API
# ---------------------------------------------------------------------------



[docs]
def extract_gw_posterior_from_bilby(
    bilby_result_file: str,
    output_file: str | None = None,
) -> str:
    """Extract GW posterior samples from a bilby result HDF5 file.

    Reads ``mass_1_source``, ``mass_2_source``, ``lambda_1``, and
    ``lambda_2`` from a bilby result file and saves them as a ``.npz`` file
    suitable for use with :class:`~jesterTOV.inference.flows.config.FlowTrainingConfig`.

    All four parameters must be present in the bilby result file.  Bilby
    writes them directly for BNS analyses, so no parameter conversion is
    performed here.

    Parameters
    ----------
    bilby_result_file : str
        Path to bilby result ``.hdf5`` file.
    output_file : str | None
        Output ``.npz`` path.  Defaults to the same directory as the input
        with a ``_gw_jester_posterior.npz`` suffix appended to the stem.

    Returns
    -------
    str
        Path to the saved ``.npz`` file.

    Raises
    ------
    KeyError
        If a required parameter is absent from the bilby result file.
    ValueError
        If the HDF5 file does not contain a ``posterior`` group.
    """
    bilby_result_file = str(bilby_result_file)

    # Determine default output path
    if output_file is None:
        stem = Path(bilby_result_file).stem
        output_file = str(
            Path(bilby_result_file).parent / f"{stem}_gw_jester_posterior.npz"
        )

    logger.info(f"Reading bilby result from {bilby_result_file}")
    params = _read_bilby_hdf5(bilby_result_file)
    logger.info(f"Found {len(next(iter(params.values())))} posterior samples")
    logger.info(f"Available parameters: {sorted(params.keys())}")

    # Validate required output parameters
    for key in _REQUIRED_OUTPUT_PARAMS:
        if key not in params:
            raise KeyError(
                f"Required parameter '{key}' not found in bilby result file "
                f"'{bilby_result_file}'. "
                f"Available parameters: {sorted(params.keys())}"
            )

    # Ensure output directory exists
    Path(output_file).parent.mkdir(parents=True, exist_ok=True)

    # Save NPZ with exactly the required keys
    np.savez(
        output_file,
        mass_1_source=params["mass_1_source"],
        mass_2_source=params["mass_2_source"],
        lambda_1=params["lambda_1"],
        lambda_2=params["lambda_2"],
    )

    logger.info(f"Saved GW posterior samples to {output_file}")
    return str(output_file)



# ---------------------------------------------------------------------------
# CLI entry point
# ---------------------------------------------------------------------------


def main() -> None:
    """Command-line interface for extracting GW posteriors from bilby results.

    Usage::

        jester_extract_gw_posterior_bilby result.hdf5 [--output out.npz]
    """
    parser = argparse.ArgumentParser(
        prog="jester_extract_gw_posterior_bilby",
        description=(
            "Extract mass_1_source, mass_2_source, lambda_1, lambda_2 from a "
            "bilby result HDF5 file and save them as a .npz file for use with "
            "jester's GW flow training pipeline."
        ),
    )
    parser.add_argument(
        "bilby_result_file",
        type=str,
        help="Path to bilby HDF5 result file",
    )
    parser.add_argument(
        "--output",
        type=str,
        default=None,
        help=(
            "Output .npz file path.  Defaults to the same directory as the "
            "input with '_gw_jester_posterior.npz' appended to the stem."
        ),
    )
    args = parser.parse_args()
    output_path = extract_gw_posterior_from_bilby(
        bilby_result_file=args.bilby_result_file,
        output_file=args.output,
    )
    logger.info(f"Saved: {output_path}")