Source code for swiftsimio.visualisation.projection_backends.subsampled_extreme

"""
Sub-samples smoothing kernel with at least 64^2 samples.

Sub-sampled smoothing kernel with each kernel evaluated at least 64^2 times. This uses a
dithered pre-calculated kernel for cell overlaps at small scales, and at large scales
uses subsampling.

Uses double precision.

This is the original smoothing code. This provides a paranoid supersampling of the kernel.
"""

import numpy as np
from swiftsimio.accelerated import jit, NUM_THREADS, prange
from swiftsimio.visualisation.projection_backends.kernels import (
    kernel_double_precision as kernel,
)
from swiftsimio.visualisation.projection_backends.kernels import (
    kernel_constant,
    kernel_gamma,
)

kernel_constant = np.float64(kernel_constant)
kernel_gamma = np.float64(kernel_gamma)


[docs] @jit(nopython=True, fastmath=True) def scatter( x: np.float64, y: np.float64, m: np.float32, h: np.float32, res: int, box_x: np.float64 = 0.0, box_y: np.float64 = 0.0, ) -> np.ndarray: """ Create a weighted scatter plot. Computes contributions to from particles with positions (`x`,`y`) with smoothing lengths `h` weighted by quantities `m`. This includes periodic boundary effects. Parameters ---------- x : np.ndarray[np.float64] Array of x-positions of the particles. Must be bounded by [0, 1]. y : np.ndarray[np.float64] Array of y-positions of the particles. Must be bounded by [0, 1]. m : np.ndarray[np.float32] Array of masses (or otherwise weights) of the particles. h : np.ndarray[np.float32] Array of smoothing lengths of the particles. res : int The number of pixels along one axis, i.e. this returns a square of res * res. box_x : np.float64 Box size in x, in the same rescaled length units as x and y. Used for periodic wrapping. box_y : np.float64 Box size in y, in the same rescaled length units as x and y. Used for periodic wrapping. Returns ------- np.ndarray[np.float32, np.float32, np.float32] Pixel grid of quantity. See Also -------- scatter_parallel Parallel implementation of this function. Notes ----- Explicitly defining the types in this function allows for a 25-50% performance improvement. In our testing, using numpy floats and integers is also an improvement over using the numba ones. Uses 4x the number of sampling points as in scatter in ``subsampled.py``. """ # Output array for our image image = np.zeros((res, res), dtype=np.float64) maximal_array_index = np.int32(res) - 1 # Change that integer to a float, we know that our x, y are bounded # by [0, 1]. float_res = np.float64(res) pixel_width = 1.0 / float_res # Pre-calculate this constant for use with the above inverse_cell_area = float_res * float_res # Minimum number of kernel evaluations for each particle (this x2 squared) MIN_KERNEL_EVALUATIONS = 32 float_MIN_KERNEL_EVALUATIONS = np.float64(MIN_KERNEL_EVALUATIONS) # Dithered kernel evaluations - note it is a 2x DITHER_EVALUATIONS^2 grid DITHER_EVALUATIONS = 64 float_DITHER_EVALUATIONS = np.float64(DITHER_EVALUATIONS) float_DITHER_EVALUATIONS_inv = 1.0 / float_DITHER_EVALUATIONS # Pre-comute a min_kernel_evaluations x min_kernel_evaluations square # for dithering in cases of small kernel overlap dithered_kernel = np.zeros( (2 * DITHER_EVALUATIONS, 2 * DITHER_EVALUATIONS), dtype=np.float64 ) # Fill with kernel evaluations for x_dither_cell in range(2 * DITHER_EVALUATIONS): x_float = np.float64(x_dither_cell) + 0.5 x_dither_distance = x_float - float_DITHER_EVALUATIONS x_dither_distance_squared = x_dither_distance * x_dither_distance for y_dither_cell in range(2 * DITHER_EVALUATIONS): y_float = np.float64(y_dither_cell) + 0.5 y_dither_distance = y_float - float_DITHER_EVALUATIONS y_dither_distance_squared = y_dither_distance * y_dither_distance r = np.sqrt(x_dither_distance_squared + y_dither_distance_squared) dithered_kernel[x_dither_cell, y_dither_cell] += kernel( r, H=float_DITHER_EVALUATIONS ) # May as well have this correctly normed. dithered_kernel *= inverse_cell_area / dithered_kernel.sum() if box_x == 0.0: xshift_min = 0 xshift_max = 1 else: xshift_min = -1 # x_min is always at x=0 xshift_max = int(np.ceil(1 / box_x) + 1) # tile the box to cover [0, 1] if box_y == 0.0: yshift_min = 0 yshift_max = 1 else: yshift_min = -1 # y_min is always at y=0 yshift_max = int(np.ceil(1 / box_y) + 1) # tile the box to cover [0, 1] for x_pos_original, y_pos_original, mass, hsml in zip(x, y, m, h): # loop over periodic copies of this particle for xshift in range(xshift_min, xshift_max): for yshift in range(yshift_min, yshift_max): x_pos = x_pos_original + xshift * box_x y_pos = y_pos_original + yshift * box_y # Calculate the cell that this particle; use the 64 bit version of the # resolution as this is the same type as the positions particle_cell_x = np.int32(np.floor(float_res * x_pos)) particle_cell_y = np.int32(np.floor(float_res * y_pos)) # SWIFT stores hsml as the FWHM. float_mass = np.float64(mass) kernel_width = np.float64(kernel_gamma * hsml) # The number of cells that this kernel spans float_cells_spanned = 1.0 + kernel_width * float_res cells_spanned = np.int32(float_cells_spanned) if ( particle_cell_x + cells_spanned < 0 or particle_cell_x - cells_spanned > maximal_array_index or particle_cell_y + cells_spanned < 0 or particle_cell_y - cells_spanned > maximal_array_index ): # Can happily skip this particle continue # If the particle is too small, then it's very likely that: # a) it does not lie on a boundary # b) evaluating it over this boundary would cause significant errors if kernel_width <= 0.25 * pixel_width: # Here we check for overlaps between this kernel and boundaries. # If they exist, we must use the sub-sampled kernel. dx_left = x_pos - np.float64(particle_cell_x) / float_res dx_right = 1.0 - dx_left dy_down = y_pos - np.float64(particle_cell_y) / float_res dy_up = 1.0 - dy_down overlaps_left = dx_left < kernel_width overlaps_right = dx_right < kernel_width overlaps_down = dy_down < kernel_width overlaps_up = dy_up < kernel_width if not ( overlaps_left or overlaps_right or overlaps_down or overlaps_up ) and ( particle_cell_x >= 0 and particle_cell_x <= maximal_array_index and particle_cell_y >= 0 and particle_cell_y <= maximal_array_index ): # Very simple case - no overlaps. image[particle_cell_x, particle_cell_y] += ( mass * inverse_cell_area ) else: # Use pre-calculated kernel with a basic dither to lay down # overlap for x_dither_cell in range(0, 2 * DITHER_EVALUATIONS): float_x_dither_cell = np.float64(x_dither_cell) pixel_x = np.int32( np.floor( float_res * ( x_pos + ( float_x_dither_cell * float_DITHER_EVALUATIONS_inv - 1.0 ) * kernel_width ) ) ) for y_dither_cell in range(0, 2 * DITHER_EVALUATIONS): float_y_dither_cell = np.float64(y_dither_cell) pixel_y = np.int32( np.floor( float_res * ( y_pos + ( float_y_dither_cell * float_DITHER_EVALUATIONS_inv - 1.0 ) * kernel_width ) ) ) if ( pixel_x >= 0 and pixel_x <= maximal_array_index and pixel_y >= 0 and pixel_y <= maximal_array_index ): image[pixel_x, pixel_y] += ( float_mass * dithered_kernel[x_dither_cell, y_dither_cell] ) else: # The number of times each pixel is subsampled. subsample_factor = max( 1, 2 * np.int32( np.ceil(float_MIN_KERNEL_EVALUATIONS / float_cells_spanned) ), ) float_subsample_factor = np.float64(subsample_factor) inv_float_subsample_factor = 1.0 / float_subsample_factor inv_float_subsample_factor_square = ( inv_float_subsample_factor * inv_float_subsample_factor ) # Now we loop over the square of cells that the kernel lives in for cell_x in range( # Ensure that the lowest x value is 0, otherwise we'll segfault max(0, particle_cell_x - cells_spanned), # Ensure that the highest x value lies within the array bounds, # otherwise we'll segfault (oops). min( particle_cell_x + cells_spanned + 1, maximal_array_index + 1 ), ): float_cell_x = np.float64(cell_x) for cell_y in range( max(0, particle_cell_y - cells_spanned), min( particle_cell_y + cells_spanned + 1, maximal_array_index + 1, ), ): float_cell_y = np.float64(cell_y) # Now we subsample the pixels to get a more accurate # determination of the kernel weight. We take the mean of the # kernel evaluations within a given pixel and apply this as # the true 'kernel evaluation'. kernel_eval = np.float64(0.0) for subsample_x in range(0, subsample_factor): subsample_position_x = ( np.float64(subsample_x) + 0.5 ) * inv_float_subsample_factor distance_x = ( float_cell_x + subsample_position_x ) * pixel_width - x_pos distance_x_2 = distance_x * distance_x for subsample_y in range(0, subsample_factor): subsample_position_y = ( np.float64(subsample_y) + 0.5 ) * inv_float_subsample_factor distance_y = ( float_cell_y + subsample_position_y ) * pixel_width - y_pos distance_y_2 = distance_y * distance_y r = np.sqrt(distance_x_2 + distance_y_2) kernel_eval += kernel(r, kernel_width) image[cell_x, cell_y] += ( float_mass * kernel_eval * inv_float_subsample_factor_square ) return image
[docs] @jit(nopython=True, fastmath=True, parallel=True) def scatter_parallel( x: np.float64, y: np.float64, m: np.float32, h: np.float32, res: int, box_x: np.float64 = 0.0, box_y: np.float64 = 0.0, ) -> np.ndarray: """ Parallel implementation of scatter. Create a weighted scatter plot. Computes contributions from particles with positions (`x`,`y`) with smoothing lengths `h` weighted by quantities `m`. This includes periodic boundary effects. Parameters ---------- x : np.ndarray[np.float64] Array of x-positions of the particles. Must be bounded by [0, 1]. y : np.ndarray[np.float64] Array of y-positions of the particles. Must be bounded by [0, 1]. m : np.ndarray[np.float32] Array of masses (or otherwise weights) of the particles. h : np.ndarray[np.float32] Array of smoothing lengths of the particles. res : int The number of pixels along one axis, i.e. this returns a square of res * res. box_x : np.float64 Box size in x, in the same rescaled length units as x and y. Used for periodic wrapping. box_y : np.float64 Box size in y, in the same rescaled length units as x and y. Used for periodic wrapping. Returns ------- np.ndarray[np.float32, np.float32, np.float32] Pixel grid of quantity. See Also -------- scatter Creates 2D scatter plot from SWIFT data. Notes ----- Explicitly defining the types in this function allows for a 25-50% performance improvement. In our testing, using numpy floats and integers is also an improvement over using the numba ones. Uses 4x the number of sampling points as in scatter_parallel in ``subsampled.py``. """ # Same as scatter, but executes in parallel! This is actually trivial, # we just make NUM_THREADS images and add them together at the end. number_of_particles = x.size core_particles = number_of_particles // NUM_THREADS output = np.zeros((res, res), dtype=np.float64) for thread in prange(NUM_THREADS): # Left edge is easy, just start at 0 and go to 'final' left_edge = thread * core_particles # Right edge is harder in case of left over particles... right_edge = thread + 1 if right_edge == NUM_THREADS: right_edge = number_of_particles else: right_edge *= core_particles output += scatter( x=x[left_edge:right_edge], y=y[left_edge:right_edge], m=m[left_edge:right_edge], h=h[left_edge:right_edge], res=res, box_x=box_x, box_y=box_y, ) return output