Source code for swiftsimio.metadata.objects

"""
Handle the metadata in SWIFTsimIO files.

There is a main abstract class, ``SWIFTMetadata``, that contains the required base
methods to correctly represent the internal representation of an HDF5 file to what
SWIFTsimIO expects to be able to unpack into the object notation (e.g.
PartType0/Coordinates -> gas.coordinates).
"""

import numpy as np
import h5py
import unyt
from unyt.array import _iterable

from swiftsimio.conversions import swift_cosmology_to_astropy
from swiftsimio import metadata
from swiftsimio.objects import cosmo_array, cosmo_factor, cosmo_quantity
from swiftsimio._handle_provider import HandleProvider
from abc import ABC, abstractmethod

import re
import warnings

from datetime import datetime
from pathlib import Path
from typing import Callable


def _convert_snake_to_camel(name: str) -> str:
    """
    Place underscore between words and make all lower case.

    Parameters
    ----------
    name : str
        Name in CamelCase.

    Returns
    -------
    str
        Converted name in snake_case.
    """
    # regular expression for camel case to snake case
    # https://stackoverflow.com/a/1176023
    return re.sub("([a-z0-9])([A-Z])", r"\1_\2", name).lower()


class SWIFTMetadata(HandleProvider, ABC):
    """
    An abstract base class for all SWIFT-related file metadata.

    Parameters
    ----------
    filename : Path
        Filename to read metadata from.

    units : SWIFTUnits
        Units object to use.

    handle : h5py.File
        File handle to read metadata from.
    """

    # Underlying path to the file that this metadata is associated with.
    filename: Path
    # The units object associated with this file. All SWIFT metadata objects
    # must use this units system.
    units: "SWIFTUnits"
    # The header dictionary which will later be unpackaged according to the
    # metadata fields.
    header: dict
    # Whether this type of file can be masked or not (this is a fixed parameter
    # that should probably not be changed at run-time).
    masking_valid: bool = False
    # Whether this file uses shared metadata cell counts for all particle types
    # (as is the case in SOAP) or whether each type (e.g. Gas, Dark Matter, etc.)
    # has its own top-level cell grid counts.
    shared_cell_counts: str | None = None
    # Whether all the arrays in this files have the same length and order (as is
    # the case for SOAP, all arrays correspond to subhalos) or whether there are
    # multiple types (e.g. Gas, Dark Matter, etc.). Allows you to use constrain_index
    # in masking as everyone uses the same _shared mask!
    homogeneous_arrays: bool = False
    # Whether per-group HDF5 attributes should be exposed on group dataset objects.
    expose_group_attributes: bool = False
    # The type of file, e.g. FullVolume, VolumeSubset, SOAP, FOF, etc.
    output_type: str
    # The size of the simulation volume in each dimension.
    boxsize: cosmo_array

    def __init__(
        self,
        filename: Path,
        units: "SWIFTUnits | None" = None,
        handle: h5py.File | None = None,
    ) -> None:
        super().__init__(filename, handle=handle)
        if units is not None:
            self.units = units
        else:
            self.units = SWIFTUnits(filename, handle=self.handle)

        # don't _close_handle_if_manager here in the ABC, let derived classes close

        return

    def load_groups(self) -> None:
        """
        Load the groups and metadata into objects.

        These are called:

            metadata.<group_name>_properties

        This contains eight arrays,

            metadata.<type>_properties.field_names
            metadata.<type>_properties.field_paths
            metadata.<type>_properties.field_units
            metadata.<type>_properties.field_cosmologies
            metadata.<type>_properties.field_descriptions
            metadata.<type>_properties.field_compressions
            metadata.<type>_properties.field_physicals
            metadata.<type>_properties.field_valid_transforms

        As well as some more information about the group.
        """
        for group, name in zip(self.present_groups, self.present_group_names):
            filetype_metadata = SWIFTGroupMetadata(
                self.filename,
                group=group,
                group_name=name,
                metadata=self,
                scale_factor=self.scale_factor,
                handle=self.handle,
            )
            setattr(self, f"{name}_properties", filetype_metadata)

        return

    def get_metadata(self) -> None:
        """Load the metadata as specified in metadata.metadata_fields."""
        for field, name in metadata.metadata_fields.metadata_fields_to_read.items():
            try:
                setattr(self, name, dict(self.handle[field].attrs))
            except KeyError:
                setattr(self, name, None)

        return

    def postprocess_header(self) -> None:
        """Do some minor postprocessing on the header to local variables."""
        # We need the scale factor to initialize `cosmo_array`s, so start with the float
        # items including the scale factor.
        # These must be unpacked as they are stored as length-1 arrays

        header_unpack_float_units = (
            metadata.metadata_fields.generate_units_header_unpack_single_float(
                mass=self.units.mass,
                length=self.units.length,
                time=self.units.time,
                current=self.units.current,
                temperature=self.units.temperature,
            )
        )
        for field, names in metadata.metadata_fields.header_unpack_single_float.items():
            try:
                if isinstance(names, list):
                    # Sometimes we store a list in case we have multiple names, for
                    # example Redshift -> metadata.redshift AND metadata.z. Can't just do
                    # the iteration because we may loop over the letters in the string.
                    for variable in names:
                        if variable in header_unpack_float_units.keys():
                            # We have an associated unit!
                            unit = header_unpack_float_units[variable]
                            setattr(
                                self,
                                variable,
                                unyt.unyt_quantity(self.header[field][0], units=unit),
                            )
                        else:
                            # No unit
                            setattr(self, variable, self.header[field][0])
                else:
                    # We can just check for the unit and set the attribute
                    variable = names
                    if variable in header_unpack_float_units.keys():
                        # We have an associated unit!
                        unit = header_unpack_float_units[variable]
                        setattr(
                            self,
                            variable,
                            unyt.unyt_quantity(self.header[field][0], units=unit),
                        )
                    else:
                        # No unit
                        setattr(self, variable, self.header[field][0])
            except KeyError:
                # Must not be present, just skip it
                continue
        # need the scale factor first for cosmology on other header attributes
        try:
            self.a = self.scale_factor
        except AttributeError:
            # These must always be present for the initialisation of cosmology properties
            self.a = 1.0
            self.scale_factor = 1.0

        # These are just read straight in to variables
        header_unpack_arrays_units = (
            metadata.metadata_fields.generate_units_header_unpack_arrays(
                mass=self.units.mass,
                length=self.units.length,
                time=self.units.time,
                current=self.units.current,
                temperature=self.units.temperature,
            )
        )
        header_unpack_arrays_cosmo_args = (
            metadata.metadata_fields.generate_cosmo_args_header_unpack_arrays(
                self.scale_factor
            )
        )

        for field, name in metadata.metadata_fields.header_unpack_arrays.items():
            try:
                if name in header_unpack_arrays_units.keys():
                    if name in header_unpack_arrays_cosmo_args.keys():
                        unpack_class = (
                            cosmo_array
                            if _iterable(self.header[field])
                            else cosmo_quantity
                        )
                        setattr(
                            self,
                            name,
                            unpack_class(
                                self.header[field],
                                units=header_unpack_arrays_units[name],
                                **header_unpack_arrays_cosmo_args[name],
                            ),
                        )
                    else:
                        setattr(
                            self,
                            name,
                            unyt.unyt_array(
                                self.header[field],
                                units=header_unpack_arrays_units[name],
                            ),
                        )
                    # This is required or we automatically get everything in CGS!
                    getattr(self, name).convert_to_units(
                        header_unpack_arrays_units[name]
                    )
                else:
                    # Must not have any units! Oh well.
                    setattr(self, name, self.header[field])
            except KeyError:
                # Must not be present, just skip it
                continue

        # Now unpack the 'mass table' type items:
        for field, name in metadata.metadata_fields.header_unpack_mass_tables.items():
            try:
                setattr(
                    self,
                    name,
                    MassTable(
                        base_mass_table=self.header[field], mass_units=self.units.mass
                    ),
                )
            except KeyError:
                setattr(
                    self,
                    name,
                    MassTable(
                        base_mass_table=np.zeros(
                            len(metadata.particle_types.particle_name_underscores)
                        ),
                        mass_units=self.units.mass,
                    ),
                )

        # These must be unpacked as 'real' strings (i.e. converted to utf-8)

        for field, name in metadata.metadata_fields.header_unpack_string.items():
            try:
                # Deal with h5py's quirkiness that fixed-sized and variable-sized
                # strings are read as strings or bytes
                # See: https://github.com/h5py/h5py/issues/2172
                raw = self.header[field]
                try:
                    string = raw.decode("utf-8")
                except AttributeError:
                    string = raw
                setattr(self, name, string)
            except KeyError:
                # Must not be present, just skip it
                setattr(self, name, "")

        # These are special cases, sorry!
        # Date and time of snapshot dump
        try:
            try:
                # Try and decode bytes, otherwise save raw string
                snapshot_date = self.header.get(
                    "SnapshotDate", self.header.get("Snapshot date", b"")
                ).decode("utf-8")
            except AttributeError:
                snapshot_date = self.header.get(
                    "SnapshotDate", self.header.get("Snapshot date", "")
                )
            try:
                self.snapshot_date = datetime.strptime(
                    snapshot_date, "%H:%M:%S %Y-%m-%d %Z"
                )
            except ValueError:
                # Backwards compatibility; this was used previously due to simplicity
                # but is not portable between regions. So if you ran a simulation on
                # a British (en_GB) machine, and then tried to read on a Dutch
                # machine (nl_NL), this would _not_ work because %c is different.
                try:
                    self.snapshot_date = datetime.strptime(snapshot_date, "%c\n")
                except ValueError:
                    # Oh dear this has gone _very_wrong. Let's just keep it as a string.
                    self.snapshot_date = snapshot_date
        except KeyError:
            # Old file
            pass

        # get photon group edges RT dataset from the SubgridScheme group
        try:
            self.photon_group_edges = (
                self.handle["SubgridScheme/PhotonGroupEdges"][:] / self.units.time
            )
        except KeyError:
            self.photon_group_edges = None

        # get reduced speed of light RT dataset from the SubgridScheme group
        try:
            self.reduced_lightspeed = (
                self.handle["SubgridScheme/ReducedLightspeed"][0]
                * self.units.length
                / self.units.time
            )
        except KeyError:
            self.reduced_lightspeed = None

        # Store these separately as self.n_gas = number of gas particles for example
        for part_number, (_, part_name) in enumerate(
            metadata.particle_types.particle_name_underscores.items()
        ):
            try:
                setattr(self, f"n_{part_name}", self.num_part[part_number])
            except IndexError:
                # Backwards compatibility; mass/number table can change size.
                setattr(self, f"n_{part_name}", 0)

        # Need to unpack the gas gamma for cosmology
        try:
            self.gas_gamma = self.hydro_scheme["Adiabatic index"]
        except (KeyError, TypeError):
            # We can set a default and print a message whenever we require this value
            self.gas_gamma = None

        return

    def extract_cosmology(self) -> None:
        """
        Create an astropy.cosmology object from the internal cosmology system.

        This will be saved as ``self.cosmology``.
        """
        if self.cosmology_raw is not None:
            cosmo = self.cosmology_raw
        else:
            cosmo = {"Cosmological run": 0}

        if cosmo.get("Cosmological run", 0):
            self.cosmology = swift_cosmology_to_astropy(cosmo, units=self.units)
        else:
            self.cosmology = None

        return

    @property
    @abstractmethod
    def present_groups(self) -> list[str]:
        """
        Get the present groups.

        A property giving the present particle groups in the file to be unpackaged
        into top-level properties. For instance, in a regular snapshot, this would be
        ["PartType0", "PartType1", "PartType4", ...]. In SOAP, this would be
        ["SO/200_crit", "SO/200_mean", ...], i.e. one per aperture.

        Returns
        -------
        list[str]
            The list of present groups.
        """
        raise NotImplementedError

    @property
    @abstractmethod
    def present_group_names(self) -> list[str]:
        """
        Get the present group names.

        A property giving the mapping for the names in ``present_groups`` to what the
        objects are called on the SWIFTsimIO objects. For instance, in a regular snapshot,
        this would be ["gas", "dark_matter", "stars", ...]. In SOAP, this would be
        ["spherical_overdensity_200_crit", ...].

        Returns
        -------
        list[str]
            The list of present group names.
        """
        raise NotImplementedError

    @property
    def partial_snapshot(self) -> bool:
        """
        Check if this is a partial (e.g. a ``x.0.hdf5`` file).

        Returns
        -------
        bool
            ``True`` if the file is a partial file, else ``False``.
        """
        return False

    @staticmethod
    @abstractmethod
    def get_nice_name(group: str) -> str:
        """
        Convert the group name to a user-readable name.

        Parameters
        ----------
        group : str
            The group name as used in the hdf5 file.

        Returns
        -------
        str
            The user-readable version of the name.
        """
        raise NotImplementedError


class MassTable(object):
    """
    Extract a mass table to local variables based on the particle type names.

    Parameters
    ----------
    base_mass_table : np.array
        Mass table of the same length as the number of particle types.

    mass_units : unyt_quantity
        Base mass units for the simulation.
    """

    def __init__(
        self, base_mass_table: np.array, mass_units: unyt.unyt_quantity
    ) -> None:
        # TODO: Extract these names from the files themselves if possible.

        for index, name in enumerate(
            metadata.particle_types.particle_name_underscores.values()
        ):
            try:
                setattr(
                    self,
                    name,
                    unyt.unyt_quantity(base_mass_table[index], units=mass_units),
                )
            except IndexError:
                # Backwards compatible.
                setattr(self, name, None)

        return

    def __str__(self) -> str:
        """
        Print a description of the mass table.

        Returns
        -------
        str
            The mass table description.
        """
        return (
            "Mass table for "
            f"{' '.join(metadata.particle_types.particle_name_underscores.values())}"
        )

    def __repr__(self) -> str:
        """
        Print a description of the mass table.

        Returns
        -------
        str
            The mass table description.
        """
        return self.__str__()


class MappingTable(object):
    """
    Provide a table mapping from one named column instance to the other.

    Initially designed for the mapping between dust and elements.

    Parameters
    ----------
    data : np.ndarray
        The data array providing the mapping between the named
        columns. Should be of size N x M, where N is the number
        of elements in ``named_columns_x`` and M the number
        of elements in ``named_columns_y``.

    named_columns_x : list[str]
        The names of the columns in the first axis.

    named_columns_y : list[str]
        The names of the columns in the second axis.

    named_columns_x_name : str
        The name of the first mapping.

    named_columns_y_name : str
        The name of the second mapping.
    """

    def __init__(
        self,
        data: np.ndarray,
        named_columns_x: list[str],
        named_columns_y: list[str],
        named_columns_x_name: str,
        named_columns_y_name: str,
    ) -> None:
        self.data = data
        self.named_columns_x = named_columns_x
        self.named_columns_y = named_columns_y
        self.named_columns_x_name = named_columns_x_name
        self.named_columns_y_name = named_columns_y_name

        for x, name_x in enumerate(named_columns_x):
            for y, name_y in enumerate(named_columns_y):
                setattr(self, f"{name_x.lower()}_to_{name_y.lower()}", data[x][y])

        return

    def __str__(self) -> str:
        """
        Print a description of the mapping table.

        Returns
        -------
        str
            The mapping table description.
        """
        return (
            f"Mapping table from {self.named_columns_x_name} to "
            f"{self.named_columns_y_name}, containing {len(self.data)} "
            f"by {len(self.data[0])} elements."
        )

    def __repr__(self) -> str:
        """
        Print a description of the mapping table.

        Returns
        -------
        str
            The mapping table description.
        """
        return f"{self.__str__()}. Raw data: \n{self.data}."



[docs]
class SWIFTGroupMetadata(HandleProvider):
    """
    Provide the metadata for one hdf5 Group.

    This, for instance, could be ``PartType0``, or ``gas``. This will load in
    the names of all datasets, possible named fields, and present them for use in the
    actual i/o routines.

    Parameters
    ----------
    filename : Path
        Filename to read metadata from.

    group : str
        The name of the group in the hdf5 file.

    group_name : str
        The corresponding group name for swiftsimio.

    metadata : SWIFTMetadata
        The snapshot metadata.

    scale_factor : float
        The snapshot scale factor.

    handle : h5py.File
        File handle to read metadata from.
    """

    filename: Path

    def __init__(
        self,
        filename: Path,
        group: str,
        group_name: str,
        metadata: "SWIFTMetadata",
        scale_factor: float,
        handle: h5py.File,
    ) -> None:
        super().__init__(filename, handle=handle)
        self.group = group
        self.group_name = group_name
        self.metadata = metadata
        self.units = metadata.units
        self.scale_factor = scale_factor

        self.load_metadata()

        self._close_handle_if_manager()

        return

    def __str__(self) -> str:
        """
        Print a description of the metadata object.

        Returns
        -------
        str
            The description.
        """
        return f"Metadata class for {self.group} ({self.group_name})"

    def __repr__(self) -> str:
        """
        Print a description of the metadata object.

        Returns
        -------
        str
            The description.
        """
        return self.__str__()


[docs]
    def load_metadata(self) -> None:
        """
        Load the required metadata.

        This includes loading the field names and any custom named columns. Field-level
        data such as units and descriptions of individual datasets are not loaded here
        but instead lazy-loaded along with the data.
        """
        self.load_field_names()
        self.load_named_columns()
        if self.metadata.expose_group_attributes:
            self.load_group_attributes()
        else:
            self.group_attributes = {}

        return



[docs]
    def load_field_names(self) -> None:
        """Load in only the field names."""
        # Skip fields which are groups themselves
        self.field_paths = []
        self.field_names = []
        for item in self.handle[f"{self.group}"].keys():
            # Skip fields which are groups themselves
            if f"{self.group}/{item}" not in self.metadata.present_groups:
                self.field_paths.append(f"{self.group}/{item}")
                self.field_names.append(_convert_snake_to_camel(item))

        return



[docs]
    def load_named_columns(self) -> None:
        """Load the named column data for relevant fields."""
        named_columns = {}

        for field in self.field_paths:
            property_name = field.split("/")[-1]

            # Not all datasets have named columns
            named_columns_metadata = getattr(self.metadata, "named_columns", {})

            if property_name in named_columns_metadata.keys():
                field_names = self.metadata.named_columns[property_name]

                # Now need to make a decision on capitalisation. If we have a set of
                # words with only one capital in them, then it's likely that they are
                # element names or something similar, so they should be lower case.
                # If on average we have many more capitals, then they are likely to be
                # ionized fractions (e.g. HeII) and so we want to leave them with their
                # original capitalisation.

                def num_capitals(s: str) -> int:
                    """
                    Count the number of upper case letters in the string.

                    Parameters
                    ----------
                    s : str
                        The string to analyse.

                    Returns
                    -------
                    int
                        The number of upper case letters in the string.
                    """
                    return sum(1 for c in s if c.isupper())

                mean_num_capitals = sum(map(num_capitals, field_names)) / len(
                    field_names
                )

                if mean_num_capitals < 1.01:
                    # Decapitalise them as they are likely individual element names
                    formatted_field_names = [x.lower() for x in field_names]
                else:
                    formatted_field_names = field_names

                named_columns[field] = formatted_field_names
            else:
                named_columns[field] = None

        self.named_columns = named_columns

        return



[docs]
    def load_group_attributes(self) -> None:
        """
        Load group-level attribute names.

        Group attribute values are not read here; values are lazy-loaded alongside
        datasets in the reader. We only discover attribute names and public aliases.
        """
        attributes = {}
        for attribute_name in self.handle[self.group].attrs.keys():
            attributes[_convert_snake_to_camel(attribute_name)] = attribute_name
        self.group_attributes = attributes

        return





[docs]
class SWIFTUnits(HandleProvider):
    """
    Generate a :mod:`unyt` system that can be used with SWIFT data.

    These give the unit mass, length, time, current, and temperature as
    unyt unit variables in simulation units. I.e. you can take any value
    that you get out of the code and multiply it by the appropriate values
    to get it 'unyt-ified' with the correct units.

    Parameters
    ----------
    filename : Path
        Name of file to read units from.

    handle : h5py.File, optional
        The h5py file handle, optional. Will open a new handle with the
        filename if required.
    """

    mass: unyt.unyt_quantity
    length: unyt.unyt_quantity
    time: unyt.unyt_quantity
    current: unyt.unyt_quantity
    temperature: unyt.unyt_quantity

    def __init__(self, filename: Path, handle: h5py.File | None = None) -> None:
        super().__init__(filename, handle=handle)
        self.get_unit_dictionary()

        self._close_handle_if_manager()

        return


[docs]
    def get_unit_dictionary(self) -> None:
        """
        Store unit data and metadata.

        Length 1 arrays are used to store the unit data. This dictionary
        also contains the metadata information that connects the unyt
        objects to the names that are stored in the SWIFT snapshots.
        """
        self.units = {
            name: unyt.unyt_quantity(
                value[0], units=metadata.unit_types.unit_names_to_unyt[name]
            )
            for name, value in self.handle["Units"].attrs.items()
        }

        # We now unpack this into variables.
        self.mass = metadata.unit_types.find_nearest_base_unit(
            self.units["Unit mass in cgs (U_M)"], "mass"
        )
        self.length = metadata.unit_types.find_nearest_base_unit(
            self.units["Unit length in cgs (U_L)"], "length"
        )
        self.time = metadata.unit_types.find_nearest_base_unit(
            self.units["Unit time in cgs (U_t)"], "time"
        )
        self.current = metadata.unit_types.find_nearest_base_unit(
            self.units["Unit current in cgs (U_I)"], "current"
        )
        self.temperature = metadata.unit_types.find_nearest_base_unit(
            self.units["Unit temperature in cgs (U_T)"], "temperature"
        )




def _metadata_discriminator(
    filename: Path, units: SWIFTUnits, handle: h5py.File | None = None
) -> "SWIFTMetadata":
    """
    Determine the type of metadata object to construct.

    Parameters
    ----------
    filename : Path
        Name of the file to read metadata from.

    units : SWIFTUnits
        The units object associated with the file.

    handle : h5py.File, optional
        File handle to read metadata from.

    Returns
    -------
    SWIFTMetadata
        The appropriate metadata object for the file type.
    """
    # Old snapshots did not have OutputType, so we need to default to FullVolume
    if handle is None:
        with h5py.File(filename, "r") as local_handle:
            file_type = local_handle["Header"].attrs.get("OutputType", "FullVolume")
    else:
        file_type = handle["Header"].attrs.get("OutputType", "FullVolume")

    if isinstance(file_type, bytes):
        file_type = file_type.decode("utf-8")

    if file_type in ["FullVolume", "VolumeSubset"]:
        return SWIFTSnapshotMetadata(filename, units, handle=handle)
    elif file_type in ["LineOfSight"]:
        return SWIFTLineOfSightMetadata(filename, units, handle=handle)
    elif file_type in ["SOAP", "SOAPSubset"]:
        return SWIFTSOAPMetadata(filename, units, handle=handle)
    elif file_type in ["FOF", "FOFSubset"]:
        return SWIFTFOFMetadata(filename, units, handle=handle)
    else:
        raise ValueError(f"File type {file_type} not recognised.")



[docs]
class SWIFTSnapshotMetadata(SWIFTMetadata):
    """
    Provide a metadata interface for SWIFT snapshot files.

    For more documentation see :class:`~swiftsimio.metadata.objects.SWIFTMetadata`.

    Parameters
    ----------
    filename : Path
        Filename to read metadata from.

    units : SWIFTUnits
        Units object to use.

    handle : h5py.File, optional
        File handle to read from.
    """

    masking_valid: bool = True

    def __init__(
        self,
        filename: Path,
        units: SWIFTUnits | None = None,
        handle: h5py.File | None = None,
    ) -> None:
        super().__init__(filename, units=units, handle=handle)
        self.get_metadata()
        self.get_named_column_metadata()
        self.get_mapping_metadata()

        self.postprocess_header()

        self.load_groups()
        self.extract_cosmology()

        self._close_handle_if_manager()

        return


[docs]
    def get_named_column_metadata(self) -> None:
        """
        Load the custom named column metadata from SubgridScheme/NamedColumns.

        If name column didn't exist just set an empty :obj:`dict` instead.
        """
        try:
            data = self.handle["SubgridScheme/NamedColumns"]

            self.named_columns = {
                k: [x.decode("utf-8") for x in data[k][:]] for k in data.keys()
            }
        except KeyError:
            self.named_columns = {}

        return



[docs]
    def get_mapping_metadata(self) -> None:
        """
        Get the mappings based on the named columns (must have already been read).

        From the form:

        SubgridScheme/{X}To{Y}Mapping.

        Includes a hack of `Dust` -> `Grains` that will be deprecated.
        """
        try:
            possible_keys = self.handle["SubgridScheme"].keys()

            available_keys = [key for key in possible_keys if key.endswith("Mapping")]
            available_data = [
                self.handle[f"SubgridScheme/{key}"][:] for key in available_keys
            ]
        except KeyError:
            available_keys = []
            available_data = []

        # Keys have form {X}To{Y}Mapping
        regex = r"([a-zA-Z]*)To([a-zA-Z]*)Mapping"
        compiled = re.compile(regex)

        for key, data in zip(available_keys, available_data):
            matched = compiled.match(key)
            snake_case = _convert_snake_to_camel(key)

            if matched:
                x = matched.group(1)
                y = matched.group(2)

                if x == "Grain":
                    warnings.warn(
                        "Use of the GrainToElementMapping is deprecated, please use a "
                        "newer version of SWIFT to run this simulation.",
                        DeprecationWarning,
                    )

                    x = "Dust"

                named_column_name_x = [
                    key for key in self.named_columns.keys() if key.startswith(x)
                ][0]
                named_column_name_y = [
                    key for key in self.named_columns.keys() if key.startswith(y)
                ][0]

                setattr(
                    self,
                    snake_case,
                    MappingTable(
                        data=data,
                        named_columns_x=self.named_columns[named_column_name_x],
                        named_columns_y=self.named_columns[named_column_name_y],
                        named_columns_x_name=named_column_name_x,
                        named_columns_y_name=named_column_name_y,
                    ),
                )

        return


    @property
    def present_groups(self) -> list[str]:
        """
        Get the groups containing datasets that are present in the file.

        Returns
        -------
        list[str]
            List of present groups.
        """
        types = np.where(np.array(getattr(self, "has_type", self.num_part)) != 0)[0]
        return [f"PartType{i}" for i in types]

    @property
    def present_group_names(self) -> list[str]:
        """
        Get the names of the groups that we want to expose.

        Returns
        -------
        list[str]
            List of names to expose.
        """
        return [
            metadata.particle_types.particle_name_underscores[x]
            for x in self.present_groups
        ]

    @property
    def code_info(self) -> str:
        """
        Get and format a nicely printed set of code information.

        Formatting is as:

        Name (Git Branch)
        Git Revision
        Git Date

        Returns
        -------
        str
            The code information.
        """

        def format_string(param: str) -> str:
            """
            Fetch a string value from metadata and decode.

            Parameters
            ----------
            param : str
                The name of the field to retrieve from the ``code`` metadata.

            Returns
            -------
            str
                The decoded string value.
            """
            return self.code[param].decode("utf-8")

        output = (
            f"{format_string('Code')} ({format_string('Git Branch')})\n"
            f"{format_string('Git Revision')}\n"
            f"{format_string('Git Date')}"
        )

        return output

    @property
    def compiler_info(self) -> str:
        """
        Get and format information about the compiler.

        Formatting is as:

        Compiler Name (Compiler Version)
        MPI library

        Returns
        -------
        str
            The compiler information.
        """

        def format_string(param: str) -> str:
            """
            Fetch a string value from metadata, decode and format.

            Parameters
            ----------
            param : str
                The name of the field to retrieve from the ``code`` metadata.

            Returns
            -------
            str
                The decoded string value.
            """
            return self.code[param].decode("utf-8")

        output = (
            f"{format_string('Compiler Name')} ({format_string('Compiler Version')})\n"
            f"{format_string('MPI library')}"
        )

        return output

    @property
    def library_info(self) -> str:
        """
        Get and format information about the libraries used.

        Formatting is as:

        FFTW vFFTW library version
        GSL vGSL library version
        HDF5 vHDF5 library version

        Returns
        -------
        str
            The library information.
        """

        def format_string(param: str) -> str:
            """
            Fetch a string value from metadata, decode and format.

            Parameters
            ----------
            param : str
                The name of the field to retrieve from the ``code`` metadata.

            Returns
            -------
            str
                The decoded string value.
            """
            return self.code[f"{param} library version"].decode("utf-8")

        output = (
            f"FFTW v{format_string('FFTW')}\n"
            f"GSL v{format_string('GSL')}\n"
            f"HDF5 v{format_string('HDF5')}"
        )

        return output

    @property
    def hydro_info(self) -> str:
        r"""
        Get and format information about the hydro scheme.

        Formatting is as:

        Scheme
        Kernel function in DimensionD
        $\eta$ = Kernel eta (Kernel target N_ngb $N_{ngb}$)
        $C_{\rm CFL}$ = CFL parameter

        Returns
        -------
        str
            Hydro scheme information.
        """

        def format_float(param: str) -> str:
            """
            Fetch a float value from metadata and format.

            Parameters
            ----------
            param : str
                The name of the field to retrieve from the ``hydro_scheme`` metadata.

            Returns
            -------
            str
                The float value formatted to 2 decimal places.
            """
            return f"{self.hydro_scheme[param][0]:4.2f}"

        def get_int(param: str) -> int:
            """
            Fetch an integer value from the metadata.

            Parameters
            ----------
            param : int
                The name of the field to retrieve from the ``hydro_scheme`` metadata.

            Returns
            -------
            str
                The integer value.
            """
            return int(self.hydro_scheme[param][0])

        def format_string(param: str) -> str:
            """
            Fetch a string value from metadata and decode.

            Parameters
            ----------
            param : str
                The name of the field to retrieve from the ``hydro_scheme`` metadata.

            Returns
            -------
            str
                The decoded string value.
            """
            return self.hydro_scheme[param].decode("utf-8")

        output = (
            f"{format_string('Scheme')}\n"
            f"{format_string('Kernel function')} in {get_int('Dimension')}D\n"
            rf"$\eta$ = {format_float('Kernel eta')} "
            rf"({format_float('Kernel target N_ngb')} $N_{{ngb}}$)"
            "\n"
            rf"$C_{{\rm CFL}}$ = {format_float('CFL parameter')}"
        )

        return output

    @property
    def viscosity_info(self) -> str:
        r"""
        Get and format information about the viscosity scheme.

        Formatting is as:

        Viscosity Model
        $\alpha_{V, 0}$ = Alpha viscosity, $\ell_V$ = Viscosity decay length \
        [internal units], $\beta_V$ = Beta viscosity
        Alpha viscosity (min) < $\alpha_V$ < Alpha viscosity (max)

        Returns
        -------
        str
            Viscosity scheme information.
        """

        def format_float(param: str) -> str:
            """
            Fetch a float value from metadata and format.

            Parameters
            ----------
            param : str
                The name of the field to retrieve from the ``hydro_scheme`` metadata.

            Returns
            -------
            str
                The float value formatted to 2 decimal places.
            """
            return f"{self.hydro_scheme[param][0]:4.2f}"

        def format_string(param: str) -> str:
            """
            Fetch a string value from metadata and decode.

            Parameters
            ----------
            param : str
                The name of the field to retrieve from the ``hydro_scheme`` metadata.

            Returns
            -------
            str
                The decoded string value.
            """
            return self.hydro_scheme[param].decode("utf-8")

        output = (
            f"{format_string('Viscosity Model')}\n"
            rf"$\alpha_{{V, 0}}$ = {format_float('Alpha viscosity')}, "
            rf"$\ell_V$ = {format_float('Viscosity decay length [internal units]')}, "
            rf"$\beta_V$ = {format_float('Beta viscosity')}"
            "\n"
            rf"{format_float('Alpha viscosity (min)')} < $\alpha_V$ < "
            rf"{format_float('Alpha viscosity (max)')}"
        )

        return output

    @property
    def diffusion_info(self) -> str:
        r"""
        Get and format information about the diffusion scheme.

        Formatting is as:

        $\alpha_{D, 0}$ = Diffusion alpha, $\beta_D$ = Diffusion beta
        Diffusion alpha (min) < $\alpha_D$ < Diffusion alpha (max)

        Returns
        -------
        str
            Formatted diffusion scheme information.
        """

        def format_float(param: str) -> str:
            """
            Fetch a float value from metadata and format.

            Parameters
            ----------
            param : str
                The name of the field to retrieve from the ``hydro_scheme`` metadata.

            Returns
            -------
            str
                The float value formatted to 2 decimal places.
            """
            return f"{self.hydro_scheme[param][0]:4.2f}"

        output = (
            rf"$\alpha_{{D, 0}}$ = {format_float('Diffusion alpha')}, "
            rf"$\beta_D$ = {format_float('Diffusion beta')}"
            "\n"
            rf"${format_float('Diffusion alpha (min)')} < "
            rf"\alpha_D < {format_float('Diffusion alpha (max)')}$"
        )

        return output

    @property
    def partial_snapshot(self) -> bool:
        """
        Check if this is a partial (e.g. a "x.0.hdf5" file).

        Returns
        -------
        bool
            ``True`` if the file is a partial file, else ``False``.
        """
        # Partial snapshots have num_files_per_snapshot set to 1. Virtual snapshots
        # collating multiple sub-snapshots together have num_files_per_snapshot = 1.

        return self.num_files_per_snapshot > 1


[docs]
    @staticmethod
    def get_nice_name(group: str) -> str:
        """
        Convert the group name to a user-readable name.

        Parameters
        ----------
        group : str
            The group name as used in the hdf5 file.

        Returns
        -------
        str
            The user-readable version of the name.
        """
        return metadata.particle_types.particle_name_class[group]





[docs]
class SWIFTLineOfSightMetadata(SWIFTSnapshotMetadata):
    """
    Provide a metadata interface for SWIFT line-of-sight files.

    For more documentation see :class:`~swiftsimio.metadata.objects.SWIFTMetadata`.

    Parameters
    ----------
    filename : Path
        Filename to read metadata from.

    units : SWIFTUnits
        Units object to use.

    handle : h5py.File, optional
        File handle to read from.
    """

    masking_valid: bool = False
    expose_group_attributes: bool = True

    @property
    def present_groups(self) -> list[str]:
        """
        Get the groups containing datasets that are present in the file.

        Returns
        -------
        list[str]
            List of LOS groups.
        """
        if self._handle:
            keys = self.handle.keys()
        else:
            with h5py.File(self.filename, "r") as handle:
                keys = list(handle.keys())

        return sorted([group for group in keys if group.startswith("LOS_")])

    @property
    def present_group_names(self) -> list[str]:
        """
        Get the names of the groups that we want to expose.

        Returns
        -------
        list[str]
            List of names to expose as attributes.
        """
        return [group.lower() for group in self.present_groups]


[docs]
    @staticmethod
    def get_nice_name(group: str) -> str:
        """
        Convert the group name to a user-readable name.

        Parameters
        ----------
        group : str
            The group name as used in the hdf5 file.

        Returns
        -------
        str
            The user-readable version of the name.
        """
        return group.replace("_", "")



[docs]
    @staticmethod
    def get_group_attribute_units(name: str) -> Callable[["SWIFTUnits"], unyt.Unit]:
        """
        Get units for line-of-sight group attributes.

        Parameters
        ----------
        name : str
            Public attribute name as exposed in swiftsimio.

        Returns
        -------
        Callable[[SWIFTUnits], unyt.Unit]
            Callable that maps a ``SWIFTUnits`` object to the desired unit.
        """
        unit_map = {
            "xpos": lambda units: units.length,
            "ypos": lambda units: units.length,
            "zpos": lambda units: units.length,
        }
        return unit_map.get(name, lambda units: unyt.dimensionless)



[docs]
    @staticmethod
    def get_group_attribute_comoving(name: str) -> bool:
        """
        Get comoving-state metadata for line-of-sight group attributes.

        Parameters
        ----------
        name : str
            Public attribute name as exposed in swiftsimio.

        Returns
        -------
        bool
            ``True`` if the value is stored in comoving units, else ``False``.
        """
        comoving_attributes = {"xpos", "ypos", "zpos"}
        return name in comoving_attributes



[docs]
    def get_group_attribute_cosmo_factor(self, name: str) -> cosmo_factor:
        """
        Get cosmological conversion metadata for LOS group attributes.

        Parameters
        ----------
        name : str
            Public attribute name as exposed in swiftsimio.

        scale_factor : float
            Scale factor of the snapshot.

        Returns
        -------
        swiftsimio.objects.cosmo_factor
            Cosmological conversion factor.
        """
        exponent_map = {"xpos": 1.0, "ypos": 1.0, "zpos": 1.0}
        exponent = exponent_map.get(name, 0.0)
        return cosmo_factor.create(self.scale_factor, exponent)





[docs]
class SWIFTFOFMetadata(SWIFTMetadata):
    """
    Provide a metadata interface for FOF catalogue files.

    For more documentation see :class:`~swiftsimio.metadata.objects.SWIFTMetadata`.

    Parameters
    ----------
    filename : Path
        Filename to read metadata from.

    units : SWIFTUnits
        Units object to use.

    handle : h5py.File, optional
        File handle to read from.
    """

    homogeneous_arrays: bool = True

    def __init__(
        self,
        filename: Path,
        units: SWIFTUnits | None = None,
        handle: h5py.File | None = None,
    ) -> None:
        super().__init__(filename, units=units, handle=handle)
        self.get_metadata()
        self.postprocess_header()

        self.load_groups()
        self.extract_cosmology()

        self._close_handle_if_manager()

        return

    @property
    def present_groups(self) -> list[str]:
        """
        The groups containing datasets that are present in the file.

        Returns
        -------
        list[str]
            List of available subhalo types.
        """
        return ["Groups"]

    @property
    def present_group_names(self) -> list[str]:
        """
        Provide the names of the groups that we want to expose.

        Returns
        -------
        list[str]
            List of the available groups.
        """
        return ["fof_groups"]


[docs]
    @staticmethod
    def get_nice_name(group: str) -> str:
        """
        Convert the group name to a user-readable name.

        Parameters
        ----------
        group : str
            The group name as used in the hdf5 file.

        Returns
        -------
        str
            The user-readable version of the name.
        """
        return "FOFGroups"





[docs]
class SWIFTSOAPMetadata(SWIFTMetadata):
    """
    Provide a metadata interface for SOAP catalogue files.

    For more documentation see :class:`~swiftsimio.metadata.objects.SWIFTMetadata`.

    Parameters
    ----------
    filename : Path
        Filename to read metadata from.

    units : SWIFTUnits
        Units object to use.

    handle : h5py.File, optional
        File handle to read from.
    """

    masking_valid: bool = True
    shared_cell_counts: str = "Subhalos"
    homogeneous_arrays: bool = True

    def __init__(
        self,
        filename: Path,
        units: SWIFTUnits | None = None,
        handle: h5py.File | None = None,
    ) -> None:
        super().__init__(filename, units=units, handle=handle)
        self.get_metadata()
        self.postprocess_header()
        self.unpack_subhalo_number()

        self.load_groups()
        self.extract_cosmology()

        self._close_handle_if_manager()

        return


[docs]
    def unpack_subhalo_number(self) -> None:
        """Set the subhalo count."""
        self.n_subhalos = int(self.num_subhalo[0])
        return


    @property
    def present_groups(self) -> list[str]:
        """
        The groups containing datasets that are present in the file.

        Returns
        -------
        list[str]
            List of available subhalo types.
        """
        return self.subhalo_types

    @property
    def present_group_names(self) -> list[str]:
        """
        Provide the names of the groups that we want to expose.

        Returns
        -------
        list[str]
            List of the available groups.
        """
        return [
            metadata.soap_types.get_soap_name_underscore(x) for x in self.present_groups
        ]


[docs]
    @staticmethod
    def get_nice_name(group: str) -> str:
        """
        Get the de-acronymed name of a specified group.

        Parameters
        ----------
        group : str
            The name as it appears in the SOAP file.

        Returns
        -------
        str
            The de-acronymed name.
        """
        return metadata.soap_types.get_soap_name_nice(group)