Source code for swiftsimio.visualisation.projection_backends.subsampled_extreme

"""
Sub-sampled smoothing kernel with each kernel evaluated
at least 64^2 times. This uses a dithered pre-calculated
kernel for cell overlaps at small scales, and at large
scales uses subsampling.

Uses double precision.
"""


"""
The original smoothing code. This provides a paranoid supersampling
of the kernel.
"""


from typing import Union
from math import sqrt, ceil
from numpy import float32, float64, int32, zeros, ndarray

from swiftsimio.accelerated import jit, NUM_THREADS, prange
from swiftsimio.accelerated import jit, NUM_THREADS, prange
from swiftsimio.visualisation.projection_backends.kernels import (
    kernel_double_precision as kernel,
)
from swiftsimio.visualisation.projection_backends.kernels import (
    kernel_constant,
    kernel_gamma,
)

kernel_constant = float64(kernel_constant)
kernel_gamma = float64(kernel_gamma)


[docs]@jit(nopython=True, fastmath=True)
def scatter(
    x: float64,
    y: float64,
    m: float32,
    h: float32,
    res: int,
    box_x: float64 = 0.0,
    box_y: float64 = 0.0,
) -> ndarray:
    """
    Creates a weighted scatter plot

    Computes contributions to from particles with positions
    (`x`,`y`) with smoothing lengths `h` weighted by quantities `m`.
    This includes periodic boundary effects.

    Parameters
    ----------

    x : np.array[float64]
        array of x-positions of the particles. Must be bounded by [0, 1].

    y : np.array[float64]
        array of y-positions of the particles. Must be bounded by [0, 1].

    m : np.array[float32]
        array of masses (or otherwise weights) of the particles

    h : np.array[float32]
        array of smoothing lengths of the particles

    res : int
        the number of pixels along one axis, i.e. this returns a square
        of res * res.

    box_x: float64
        box size in x, in the same rescaled length units as x and y. Used
        for periodic wrapping.

    box_y: float64
        box size in y, in the same rescaled length units as x and y. Used
        for periodic wrapping.

    Returns
    -------

    np.array[float32, float32, float32]
        pixel grid of quantity

    See Also
    --------

    scatter_parallel : Parallel implementation of this function

    Notes
    -----

    Explicitly defining the types in this function allows
    for a 25-50% performance improvement. In our testing, using numpy
    floats and integers is also an improvement over using the numba ones.

    Uses 4x the number of sampling points as in scatter in subsampled.py
    """
    # Output array for our image
    image = zeros((res, res), dtype=float64)
    maximal_array_index = int32(res) - 1

    # Change that integer to a float, we know that our x, y are bounded
    # by [0, 1].
    float_res = float64(res)
    pixel_width = 1.0 / float_res

    # Pre-calculate this constant for use with the above
    inverse_cell_area = float_res * float_res

    # Minimum number of kernel evaluations for each particle (this x2 squared)
    MIN_KERNEL_EVALUATIONS = 32
    float_MIN_KERNEL_EVALUATIONS = float64(MIN_KERNEL_EVALUATIONS)

    # Dithered kernel evaluations - note it is a 2x DITHER_EVALUATIONS^2 grid
    DITHER_EVALUATIONS = 64
    float_DITHER_EVALUATIONS = float64(DITHER_EVALUATIONS)
    float_DITHER_EVALUATIONS_inv = 1.0 / float_DITHER_EVALUATIONS

    # Pre-comute a min_kernel_evaluations x min_kernel_evaluations square
    # for dithering in cases of small kernel overlap
    dithered_kernel = zeros(
        (2 * DITHER_EVALUATIONS, 2 * DITHER_EVALUATIONS), dtype=float64
    )

    # Fill with kernel evaluations
    for x_dither_cell in range(2 * DITHER_EVALUATIONS):
        x_float = float64(x_dither_cell) + 0.5
        x_dither_distance = x_float - float_DITHER_EVALUATIONS
        x_dither_distance_squared = x_dither_distance * x_dither_distance
        for y_dither_cell in range(2 * DITHER_EVALUATIONS):
            y_float = float64(y_dither_cell) + 0.5
            y_dither_distance = y_float - float_DITHER_EVALUATIONS
            y_dither_distance_squared = y_dither_distance * y_dither_distance

            r = sqrt(x_dither_distance_squared + y_dither_distance_squared)
            dithered_kernel[x_dither_cell, y_dither_cell] += kernel(
                r, H=float_DITHER_EVALUATIONS
            )

    # May as well have this correctly normed.
    dithered_kernel *= inverse_cell_area / dithered_kernel.sum()

    if box_x == 0.0:
        xshift_min = 0
        xshift_max = 1
    else:
        xshift_min = -1
        xshift_max = 2
    if box_y == 0.0:
        yshift_min = 0
        yshift_max = 1
    else:
        yshift_min = -1
        yshift_max = 2

    for x_pos_original, y_pos_original, mass, hsml in zip(x, y, m, h):
        # loop over periodic copies of this particle
        for xshift in range(xshift_min, xshift_max):
            for yshift in range(yshift_min, yshift_max):
                x_pos = x_pos_original + xshift * box_x
                y_pos = y_pos_original + yshift * box_y

                # Calculate the cell that this particle; use the 64 bit version of the
                # resolution as this is the same type as the positions
                particle_cell_x = int32(float_res * x_pos)
                particle_cell_y = int32(float_res * y_pos)

                # SWIFT stores hsml as the FWHM.
                float_mass = float64(mass)
                kernel_width = float64(kernel_gamma * hsml)

                # The number of cells that this kernel spans
                float_cells_spanned = 1.0 + kernel_width * float_res
                cells_spanned = int32(float_cells_spanned)

                if (
                    particle_cell_x + cells_spanned < 0
                    or particle_cell_x - cells_spanned > maximal_array_index
                    or particle_cell_y + cells_spanned < 0
                    or particle_cell_y - cells_spanned > maximal_array_index
                ):
                    # Can happily skip this particle
                    continue

                # If the particle is too small, then it's very likely that:
                # a) it does not lie on a boundary
                # b) evaluating it over this boundary would cause significant errors
                if kernel_width <= 0.25 * pixel_width:
                    # Here we check for overlaps between this kernel and boundaries.
                    # If they exist, we must use the sub-sampled kernel.

                    dx_left = x_pos - float64(particle_cell_x)
                    dx_right = float64(particle_cell_x) + 1.0 - x_pos
                    dy_down = y_pos - float64(particle_cell_y)
                    dy_up = float64(particle_cell_y) + 1.0 - y_pos

                    overlaps_left = dx_left < kernel_width
                    overlaps_right = dx_right < kernel_width
                    overlaps_down = dy_down < kernel_width
                    overlaps_up = dy_up < kernel_width

                    if not (
                        overlaps_left or overlaps_right or overlaps_down or overlaps_up
                    ):
                        # Very simple case - no overlaps.
                        image[particle_cell_x, particle_cell_y] += (
                            mass * inverse_cell_area
                        )
                    else:
                        # Use pre-calculated kernel with a basic dither to lay down
                        # overlap
                        for x_dither_cell in range(0, 2 * DITHER_EVALUATIONS):
                            float_x_dither_cell = float64(x_dither_cell)
                            pixel_x = int32(
                                float_res
                                * (
                                    x_pos
                                    + (
                                        float_x_dither_cell
                                        * float_DITHER_EVALUATIONS_inv
                                        - 1.0
                                    )
                                    * kernel_width
                                )
                            )
                            for y_dither_cell in range(0, 2 * DITHER_EVALUATIONS):
                                float_y_dither_cell = float64(y_dither_cell)
                                pixel_y = int32(
                                    float_res
                                    * (
                                        y_pos
                                        + (
                                            float_y_dither_cell
                                            * float_DITHER_EVALUATIONS_inv
                                            - 1.0
                                        )
                                        * kernel_width
                                    )
                                )

                                if (
                                    pixel_x >= 0
                                    and pixel_x <= maximal_array_index
                                    and pixel_y >= 0
                                    and pixel_y <= maximal_array_index
                                ):
                                    image[pixel_x, pixel_y] += (
                                        float_mass
                                        * dithered_kernel[x_dither_cell, y_dither_cell]
                                    )

                else:
                    # The number of times each pixel is subsampled.
                    subsample_factor = max(
                        1,
                        2
                        * int32(
                            ceil(float_MIN_KERNEL_EVALUATIONS / float_cells_spanned)
                        ),
                    )
                    float_subsample_factor = float64(subsample_factor)
                    inv_float_subsample_factor = 1.0 / float_subsample_factor
                    inv_float_subsample_factor_square = (
                        inv_float_subsample_factor * inv_float_subsample_factor
                    )

                    # Now we loop over the square of cells that the kernel lives in
                    for cell_x in range(
                        # Ensure that the lowest x value is 0, otherwise we'll segfault
                        max(0, particle_cell_x - cells_spanned),
                        # Ensure that the highest x value lies within the array bounds,
                        # otherwise we'll segfault (oops).
                        min(
                            particle_cell_x + cells_spanned + 1, maximal_array_index + 1
                        ),
                    ):
                        float_cell_x = float64(cell_x)
                        for cell_y in range(
                            max(0, particle_cell_y - cells_spanned),
                            min(
                                particle_cell_y + cells_spanned + 1,
                                maximal_array_index + 1,
                            ),
                        ):
                            float_cell_y = float64(cell_y)
                            # Now we subsample the pixels to get a more accurate determination
                            # of the kernel weight. We take the mean of the kernel evaluations
                            # within a given pixel and apply this as the true 'kernel evaluation'.
                            kernel_eval = float64(0.0)

                            for subsample_x in range(0, subsample_factor):
                                subsample_position_x = (
                                    float64(subsample_x) + 0.5
                                ) * inv_float_subsample_factor

                                distance_x = (
                                    float_cell_x + subsample_position_x
                                ) * pixel_width - x_pos

                                distance_x_2 = distance_x * distance_x

                                for subsample_y in range(0, subsample_factor):
                                    subsample_position_y = (
                                        float64(subsample_y) + 0.5
                                    ) * inv_float_subsample_factor

                                    distance_y = (
                                        float_cell_y + subsample_position_y
                                    ) * pixel_width - y_pos

                                    distance_y_2 = distance_y * distance_y

                                    r = sqrt(distance_x_2 + distance_y_2)
                                    kernel_eval += kernel(r, kernel_width)

                            image[cell_x, cell_y] += (
                                float_mass
                                * kernel_eval
                                * inv_float_subsample_factor_square
                            )

    return image


[docs]@jit(nopython=True, fastmath=True, parallel=True)
def scatter_parallel(
    x: float64,
    y: float64,
    m: float32,
    h: float32,
    res: int,
    box_x: float64 = 0.0,
    box_y: float64 = 0.0,
) -> ndarray:
    """
    Parallel implementation of scatter

    Creates a weighted scatter plot. Computes contributions from
    particles with positions (`x`,`y`) with smoothing lengths `h`
    weighted by quantities `m`.
    This includes periodic boundary effects.

    Parameters
    ----------
    x : np.array[float64]
        array of x-positions of the particles. Must be bounded by [0, 1].

    y : np.array[float64]
        array of y-positions of the particles. Must be bounded by [0, 1].

    m : np.array[float32]
        array of masses (or otherwise weights) of the particles

    h : np.array[float32]
        array of smoothing lengths of the particles

    res : int
        the number of pixels along one axis, i.e. this returns a square
        of res * res.

    box_x: float64
        box size in x, in the same rescaled length units as x and y. Used
        for periodic wrapping.

    box_y: float64
        box size in y, in the same rescaled length units as x and y. Used
        for periodic wrapping.

    Returns
    -------

    np.array[float32, float32, float32]
        pixel grid of quantity

    See Also
    --------

    scatter : Creates 2D scatter plot from SWIFT data

    Notes
    -----

    Explicitly defining the types in this function allows
    for a 25-50% performance improvement. In our testing, using numpy
    floats and integers is also an improvement over using the numba ones.

    Uses 4x the number of sampling points as in scatter_parallel in subsampled.py
    """
    # Same as scatter, but executes in parallel! This is actually trivial,
    # we just make NUM_THREADS images and add them together at the end.

    number_of_particles = x.size
    core_particles = number_of_particles // NUM_THREADS

    output = zeros((res, res), dtype=float64)

    for thread in prange(NUM_THREADS):
        # Left edge is easy, just start at 0 and go to 'final'
        left_edge = thread * core_particles

        # Right edge is harder in case of left over particles...
        right_edge = thread + 1

        if right_edge == NUM_THREADS:
            right_edge = number_of_particles
        else:
            right_edge *= core_particles

        output += scatter(
            x=x[left_edge:right_edge],
            y=y[left_edge:right_edge],
            m=m[left_edge:right_edge],
            h=h[left_edge:right_edge],
            res=res,
            box_x=box_x,
            box_y=box_y,
        )

    return output