Skip to content

writers #

WRITERS:

Output functions for run-fit

Functions:

  • write_scatter

    Predict and save energies/forces to HDF5.

  • report

    Report training progress for Levenberg-Marquardt optimizer.

write_scatter #

write_scatter(
    dataset: Dataset,
    force_field: TensorForceField,
    topology_in: TensorTopology,
    device_type: str,
    filename: PathLike,
) -> tuple[float, float, float, float]

Predict and save energies/forces to HDF5.

Source code in presto/writers.py
def write_scatter(
    dataset: datasets.Dataset,
    force_field: smee.TensorForceField,
    topology_in: smee.TensorTopology,
    device_type: str,
    filename: PathLike,
) -> tuple[float, float, float, float]:
    """Predict and save energies/forces to HDF5."""
    energy_ref_all, energy_pred_all, forces_ref_all, forces_pred_all, *_ = (
        predict_with_weights(
            dataset,
            force_field,
            {dataset[0]["smiles"]: topology_in},
            device_type=device_type,
            normalize=False,
        )
    )

    with torch.no_grad():
        energy_diffs = energy_pred_all - energy_ref_all
        force_diffs = forces_pred_all - forces_ref_all

        # Save to HDF5
        with h5py.File(filename, "w") as f:
            f.create_dataset("energy_reference", data=energy_ref_all.cpu().numpy())
            f.create_dataset("energy_predicted", data=energy_pred_all.cpu().numpy())
            f.create_dataset("energy_differences", data=energy_diffs.cpu().numpy())

            f.create_dataset("forces_reference", data=forces_ref_all.cpu().numpy())
            f.create_dataset("forces_predicted", data=forces_pred_all.cpu().numpy())
            f.create_dataset("forces_differences", data=force_diffs.cpu().numpy())

            f.attrs["n_conformers"] = len(energy_ref_all)
            f.attrs["n_atoms"] = forces_ref_all.shape[0] // len(energy_ref_all)

        # Summary statistics
        energy_mean = torch.mean(energy_diffs).item()
        energy_std = torch.std(energy_diffs).item()
        forces_mean = torch.mean(force_diffs).item()
        forces_std = torch.std(force_diffs).item()

        return energy_mean, energy_std, forces_mean, forces_std

report #

report(
    step: int,
    x: Tensor,
    loss: Tensor,
    gradient: Tensor,
    hessian: Tensor,
    step_quality: float,
    accept_step: bool,
    trainable: Trainable,
    topologies: list[TensorTopology],
    datasets_train: list[Dataset],
    datasets_test: list[Dataset],
    initial_parameters: Tensor,
    regularisation_target: Literal["initial", "zero"],
    metrics_file: PathLike,
    experiment_dir: Path,
) -> None

Report training progress for Levenberg-Marquardt optimizer.

This function computes training and test losses using the same loss computation as train_adam (via compute_overall_loss_and_grad) to ensure consistent metrics reporting.

Args: step: Current optimization step. x: Current trainable parameters. loss: Loss value from the optimizer (not used, we recompute for consistency). gradient: Gradient tensor (not used in reporting). hessian: Hessian tensor (not used in reporting). step_quality: Quality measure of the step (not used in reporting). accept_step: Whether the step was accepted (not used in reporting). trainable: The trainable object containing the force field. topologies: List of topologies for all molecules. datasets_train: List of training datasets for all molecules. datasets_test: List of test datasets for all molecules. initial_parameters: Initial parameters for regularization. regularisation_target: Regularization target ('initial' or 'zero'). metrics_file: Path to the metrics output file. experiment_dir: Path to the TensorBoard experiment directory.

Source code in presto/writers.py
def report(
    step: int,
    x: torch.Tensor,
    loss: torch.Tensor,
    gradient: torch.Tensor,
    hessian: torch.Tensor,
    step_quality: float,
    accept_step: bool,
    trainable: descent.train.Trainable,
    topologies: list[smee.TensorTopology],
    datasets_train: list[datasets.Dataset],
    datasets_test: list[datasets.Dataset],
    initial_parameters: torch.Tensor,
    regularisation_target: typing.Literal["initial", "zero"],
    metrics_file: PathLike,
    experiment_dir: pathlib.Path,
) -> None:
    """Report training progress for Levenberg-Marquardt optimizer.

    This function computes training and test losses using the same loss
    computation as train_adam (via compute_overall_loss_and_grad) to ensure
    consistent metrics reporting.

    Args:
        step: Current optimization step.
        x: Current trainable parameters.
        loss: Loss value from the optimizer (not used, we recompute for consistency).
        gradient: Gradient tensor (not used in reporting).
        hessian: Hessian tensor (not used in reporting).
        step_quality: Quality measure of the step (not used in reporting).
        accept_step: Whether the step was accepted (not used in reporting).
        trainable: The trainable object containing the force field.
        topologies: List of topologies for all molecules.
        datasets_train: List of training datasets for all molecules.
        datasets_test: List of test datasets for all molecules.
        initial_parameters: Initial parameters for regularization.
        regularisation_target: Regularization target ('initial' or 'zero').
        metrics_file: Path to the metrics output file.
        experiment_dir: Path to the TensorBoard experiment directory.
    """
    with torch.enable_grad():  # type: ignore[no-untyped-call]
        # Compute training loss using the same function as train_adam
        loss_train, _ = compute_overall_loss_and_grad(
            datasets_train,
            trainable,
            x,
            initial_parameters,
            topologies,
            regularisation_target,
            x.device.type,
            compute_grad=False,
        )

        # Compute test loss using the same function as train_adam
        loss_test, _ = compute_overall_loss_and_grad(
            datasets_test,
            trainable,
            x,
            initial_parameters,
            topologies,
            regularisation_target,
            x.device.type,
            compute_grad=False,
        )

        with open_writer(experiment_dir) as writer:
            with open(metrics_file, "a") as f:
                write_metrics(step, loss_train, loss_test, writer, f)