Source code for swiftsimio.visualisation.projection_backends.subsampled_extreme

"""
Sub-sampled smoothing kernel with each kernel evaluated
at least 64^2 times. This uses a dithered pre-calculated
kernel for cell overlaps at small scales, and at large
scales uses subsampling.

Uses double precision.
"""


"""
The original smoothing code. This provides a paranoid supersampling
of the kernel.
"""


from typing import Union
from math import sqrt, ceil
from numpy import float32, float64, int32, zeros, ndarray

from swiftsimio.accelerated import jit, NUM_THREADS, prange
from swiftsimio.accelerated import jit, NUM_THREADS, prange
from swiftsimio.visualisation.projection_backends.kernels import (
    kernel_double_precision as kernel,
)
from swiftsimio.visualisation.projection_backends.kernels import (
    kernel_constant,
    kernel_gamma,
)

kernel_constant = float64(kernel_constant)
kernel_gamma = float64(kernel_gamma)


[docs]@jit(nopython=True, fastmath=True) def scatter( x: float64, y: float64, m: float32, h: float32, res: int, box_x: float64 = 0.0, box_y: float64 = 0.0, ) -> ndarray: """ Creates a weighted scatter plot Computes contributions to from particles with positions (`x`,`y`) with smoothing lengths `h` weighted by quantities `m`. This includes periodic boundary effects. Parameters ---------- x : np.array[float64] array of x-positions of the particles. Must be bounded by [0, 1]. y : np.array[float64] array of y-positions of the particles. Must be bounded by [0, 1]. m : np.array[float32] array of masses (or otherwise weights) of the particles h : np.array[float32] array of smoothing lengths of the particles res : int the number of pixels along one axis, i.e. this returns a square of res * res. box_x: float64 box size in x, in the same rescaled length units as x and y. Used for periodic wrapping. box_y: float64 box size in y, in the same rescaled length units as x and y. Used for periodic wrapping. Returns ------- np.array[float32, float32, float32] pixel grid of quantity See Also -------- scatter_parallel : Parallel implementation of this function Notes ----- Explicitly defining the types in this function allows for a 25-50% performance improvement. In our testing, using numpy floats and integers is also an improvement over using the numba ones. Uses 4x the number of sampling points as in scatter in subsampled.py """ # Output array for our image image = zeros((res, res), dtype=float64) maximal_array_index = int32(res) - 1 # Change that integer to a float, we know that our x, y are bounded # by [0, 1]. float_res = float64(res) pixel_width = 1.0 / float_res # Pre-calculate this constant for use with the above inverse_cell_area = float_res * float_res # Minimum number of kernel evaluations for each particle (this x2 squared) MIN_KERNEL_EVALUATIONS = 32 float_MIN_KERNEL_EVALUATIONS = float64(MIN_KERNEL_EVALUATIONS) # Dithered kernel evaluations - note it is a 2x DITHER_EVALUATIONS^2 grid DITHER_EVALUATIONS = 64 float_DITHER_EVALUATIONS = float64(DITHER_EVALUATIONS) float_DITHER_EVALUATIONS_inv = 1.0 / float_DITHER_EVALUATIONS # Pre-comute a min_kernel_evaluations x min_kernel_evaluations square # for dithering in cases of small kernel overlap dithered_kernel = zeros( (2 * DITHER_EVALUATIONS, 2 * DITHER_EVALUATIONS), dtype=float64 ) # Fill with kernel evaluations for x_dither_cell in range(2 * DITHER_EVALUATIONS): x_float = float64(x_dither_cell) + 0.5 x_dither_distance = x_float - float_DITHER_EVALUATIONS x_dither_distance_squared = x_dither_distance * x_dither_distance for y_dither_cell in range(2 * DITHER_EVALUATIONS): y_float = float64(y_dither_cell) + 0.5 y_dither_distance = y_float - float_DITHER_EVALUATIONS y_dither_distance_squared = y_dither_distance * y_dither_distance r = sqrt(x_dither_distance_squared + y_dither_distance_squared) dithered_kernel[x_dither_cell, y_dither_cell] += kernel( r, H=float_DITHER_EVALUATIONS ) # May as well have this correctly normed. dithered_kernel *= inverse_cell_area / dithered_kernel.sum() if box_x == 0.0: xshift_min = 0 xshift_max = 1 else: xshift_min = -1 xshift_max = 2 if box_y == 0.0: yshift_min = 0 yshift_max = 1 else: yshift_min = -1 yshift_max = 2 for x_pos_original, y_pos_original, mass, hsml in zip(x, y, m, h): # loop over periodic copies of this particle for xshift in range(xshift_min, xshift_max): for yshift in range(yshift_min, yshift_max): x_pos = x_pos_original + xshift * box_x y_pos = y_pos_original + yshift * box_y # Calculate the cell that this particle; use the 64 bit version of the # resolution as this is the same type as the positions particle_cell_x = int32(float_res * x_pos) particle_cell_y = int32(float_res * y_pos) # SWIFT stores hsml as the FWHM. float_mass = float64(mass) kernel_width = float64(kernel_gamma * hsml) # The number of cells that this kernel spans float_cells_spanned = 1.0 + kernel_width * float_res cells_spanned = int32(float_cells_spanned) if ( particle_cell_x + cells_spanned < 0 or particle_cell_x - cells_spanned > maximal_array_index or particle_cell_y + cells_spanned < 0 or particle_cell_y - cells_spanned > maximal_array_index ): # Can happily skip this particle continue # If the particle is too small, then it's very likely that: # a) it does not lie on a boundary # b) evaluating it over this boundary would cause significant errors if kernel_width <= 0.25 * pixel_width: # Here we check for overlaps between this kernel and boundaries. # If they exist, we must use the sub-sampled kernel. dx_left = x_pos - float64(particle_cell_x) dx_right = float64(particle_cell_x) + 1.0 - x_pos dy_down = y_pos - float64(particle_cell_y) dy_up = float64(particle_cell_y) + 1.0 - y_pos overlaps_left = dx_left < kernel_width overlaps_right = dx_right < kernel_width overlaps_down = dy_down < kernel_width overlaps_up = dy_up < kernel_width if not ( overlaps_left or overlaps_right or overlaps_down or overlaps_up ): # Very simple case - no overlaps. image[particle_cell_x, particle_cell_y] += ( mass * inverse_cell_area ) else: # Use pre-calculated kernel with a basic dither to lay down # overlap for x_dither_cell in range(0, 2 * DITHER_EVALUATIONS): float_x_dither_cell = float64(x_dither_cell) pixel_x = int32( float_res * ( x_pos + ( float_x_dither_cell * float_DITHER_EVALUATIONS_inv - 1.0 ) * kernel_width ) ) for y_dither_cell in range(0, 2 * DITHER_EVALUATIONS): float_y_dither_cell = float64(y_dither_cell) pixel_y = int32( float_res * ( y_pos + ( float_y_dither_cell * float_DITHER_EVALUATIONS_inv - 1.0 ) * kernel_width ) ) if ( pixel_x >= 0 and pixel_x <= maximal_array_index and pixel_y >= 0 and pixel_y <= maximal_array_index ): image[pixel_x, pixel_y] += ( float_mass * dithered_kernel[x_dither_cell, y_dither_cell] ) else: # The number of times each pixel is subsampled. subsample_factor = max( 1, 2 * int32( ceil(float_MIN_KERNEL_EVALUATIONS / float_cells_spanned) ), ) float_subsample_factor = float64(subsample_factor) inv_float_subsample_factor = 1.0 / float_subsample_factor inv_float_subsample_factor_square = ( inv_float_subsample_factor * inv_float_subsample_factor ) # Now we loop over the square of cells that the kernel lives in for cell_x in range( # Ensure that the lowest x value is 0, otherwise we'll segfault max(0, particle_cell_x - cells_spanned), # Ensure that the highest x value lies within the array bounds, # otherwise we'll segfault (oops). min( particle_cell_x + cells_spanned + 1, maximal_array_index + 1 ), ): float_cell_x = float64(cell_x) for cell_y in range( max(0, particle_cell_y - cells_spanned), min( particle_cell_y + cells_spanned + 1, maximal_array_index + 1, ), ): float_cell_y = float64(cell_y) # Now we subsample the pixels to get a more accurate determination # of the kernel weight. We take the mean of the kernel evaluations # within a given pixel and apply this as the true 'kernel evaluation'. kernel_eval = float64(0.0) for subsample_x in range(0, subsample_factor): subsample_position_x = ( float64(subsample_x) + 0.5 ) * inv_float_subsample_factor distance_x = ( float_cell_x + subsample_position_x ) * pixel_width - x_pos distance_x_2 = distance_x * distance_x for subsample_y in range(0, subsample_factor): subsample_position_y = ( float64(subsample_y) + 0.5 ) * inv_float_subsample_factor distance_y = ( float_cell_y + subsample_position_y ) * pixel_width - y_pos distance_y_2 = distance_y * distance_y r = sqrt(distance_x_2 + distance_y_2) kernel_eval += kernel(r, kernel_width) image[cell_x, cell_y] += ( float_mass * kernel_eval * inv_float_subsample_factor_square ) return image
[docs]@jit(nopython=True, fastmath=True, parallel=True) def scatter_parallel( x: float64, y: float64, m: float32, h: float32, res: int, box_x: float64 = 0.0, box_y: float64 = 0.0, ) -> ndarray: """ Parallel implementation of scatter Creates a weighted scatter plot. Computes contributions from particles with positions (`x`,`y`) with smoothing lengths `h` weighted by quantities `m`. This includes periodic boundary effects. Parameters ---------- x : np.array[float64] array of x-positions of the particles. Must be bounded by [0, 1]. y : np.array[float64] array of y-positions of the particles. Must be bounded by [0, 1]. m : np.array[float32] array of masses (or otherwise weights) of the particles h : np.array[float32] array of smoothing lengths of the particles res : int the number of pixels along one axis, i.e. this returns a square of res * res. box_x: float64 box size in x, in the same rescaled length units as x and y. Used for periodic wrapping. box_y: float64 box size in y, in the same rescaled length units as x and y. Used for periodic wrapping. Returns ------- np.array[float32, float32, float32] pixel grid of quantity See Also -------- scatter : Creates 2D scatter plot from SWIFT data Notes ----- Explicitly defining the types in this function allows for a 25-50% performance improvement. In our testing, using numpy floats and integers is also an improvement over using the numba ones. Uses 4x the number of sampling points as in scatter_parallel in subsampled.py """ # Same as scatter, but executes in parallel! This is actually trivial, # we just make NUM_THREADS images and add them together at the end. number_of_particles = x.size core_particles = number_of_particles // NUM_THREADS output = zeros((res, res), dtype=float64) for thread in prange(NUM_THREADS): # Left edge is easy, just start at 0 and go to 'final' left_edge = thread * core_particles # Right edge is harder in case of left over particles... right_edge = thread + 1 if right_edge == NUM_THREADS: right_edge = number_of_particles else: right_edge *= core_particles output += scatter( x=x[left_edge:right_edge], y=y[left_edge:right_edge], m=m[left_edge:right_edge], h=h[left_edge:right_edge], res=res, box_x=box_x, box_y=box_y, ) return output