Source code for swiftsimio.visualisation.projection_backends.gpu

from math import sqrt, ceil
from numpy import float64, float32, int32, ndarray
from swiftsimio.optional_packages import (
    CUDA_AVAILABLE,
    cuda_jit,
    CudaSupportError,
    cuda,
)

kernel_gamma = float32(1.897367)


[docs]@cuda_jit("float32(float32, float32)", device=True) def kernel(r: float32, H: float32): """ Single precision kernel implementation for swiftsimio. This is the Wendland-C2 kernel as shown in Denhen & Aly (2012) [1]_. Parameters ---------- r : float32 radius used in kernel computation H : float32 kernel width (i.e. radius of compact support for the kernel) Returns ------- float32 Contribution to the density by the particle References ---------- .. [1] Dehnen W., Aly H., 2012, MNRAS, 425, 1068 Notes ----- This is the cuda-compiled version of the kernel, designed for use within the gpu backend. It has no double precision cousin. """ kernel_constant = float32(2.22817109) inverse_H = 1.0 / H ratio = r * inverse_H kernel = 0.0 if ratio < 1.0: one_minus_ratio = 1.0 - ratio one_minus_ratio_2 = one_minus_ratio * one_minus_ratio one_minus_ratio_4 = one_minus_ratio_2 * one_minus_ratio_2 kernel = max(one_minus_ratio_4 * (1.0 + 4.0 * ratio), 0.0) kernel *= kernel_constant * inverse_H * inverse_H return kernel
[docs]@cuda_jit( "void(float64[:], float64[:], float32[:], float32[:], float64, float64, float32[:,:])" ) def scatter_gpu( x: float64, y: float64, m: float32, h: float32, box_x: float64, box_y: float64, img: float32, ): """ Creates a weighted scatter plot Computes contributions to from particles with positions (`x`,`y`) with smoothing lengths `h` weighted by quantities `m`. This includes periodic boundary effects. Parameters ---------- x : np.array[float64] array of x-positions of the particles. Must be bounded by [0, 1]. y : np.array[float64] array of y-positions of the particles. Must be bounded by [0, 1]. m : np.array[float32] array of masses (or otherwise weights) of the particles h : np.array[float32] array of smoothing lengths of the particles box_x: float64 box size in x, in the same rescaled length units as x and y. Used for periodic wrapping. box_y: float64 box size in y, in the same rescaled length units as x and y. Used for periodic wrapping. img : np.array[float32] The output image. Notes ----- Explicitly defining the types in this function allows for a performance improvement. This is the cuda version, and as such can only be ran on systems with a supported GPU. Do not call this where cuda is not available (checks can be performed using ``swiftsimio.optional_packages.CUDA_AVAILABLE``) """ # Output array for our image res = img.shape[0] maximal_array_index = int32(res) - 1 # Change that integer to a float, we know that our x, y are bounded # by [0, 1]. float_res = float32(res) pixel_width = 1.0 / float_res # We need this for combining with the x_pos and y_pos variables. float_res_64 = float64(res) # Pre-calculate this constant for use with the above inverse_cell_area = res * res # get the particle index and the x and y index of its periodic copy i, dx, dy = cuda.grid(3) if i < len(x): # Get the correct particle mass = m[i] hsml = h[i] x_pos = x[i] + (dx - 1.0) * box_x y_pos = y[i] + (dy - 1.0) * box_y # Calculate the cell that this particle; use the 64 bit version of the # resolution as this is the same type as the positions particle_cell_x = int32(float_res_64 * x_pos) particle_cell_y = int32(float_res_64 * y_pos) # SWIFT stores hsml as the FWHM. kernel_width = kernel_gamma * hsml # The number of cells that this kernel spans cells_spanned = int32(1.0 + kernel_width * float_res) if ( particle_cell_x + cells_spanned < 0 or particle_cell_x - cells_spanned > maximal_array_index or particle_cell_y + cells_spanned < 0 or particle_cell_y - cells_spanned > maximal_array_index ): # Can happily skip this particle return if cells_spanned <= 1: # Easygame, gg if ( particle_cell_x >= 0 and particle_cell_x <= maximal_array_index and particle_cell_y >= 0 and particle_cell_y <= maximal_array_index ): cuda.atomic.add( img, (particle_cell_x, particle_cell_y), mass * inverse_cell_area ) else: # Now we loop over the square of cells that the kernel lives in for cell_x in range( # Ensure that the lowest x value is 0, otherwise we'll segfault max(0, particle_cell_x - cells_spanned), # Ensure that the highest x value lies within the array bounds, # otherwise we'll segfault (oops). min(particle_cell_x + cells_spanned + 1, maximal_array_index + 1), ): # The distance in x to our new favourite cell # remember that our x, y are all in a box of [0, 1] # calculate the distance to the cell center distance_x = (float32(cell_x) + 0.5) * pixel_width distance_x -= float32(x_pos) distance_x_2 = distance_x * distance_x for cell_y in range( max(0, particle_cell_y - cells_spanned), min(particle_cell_y + cells_spanned + 1, maximal_array_index + 1), ): distance_y = (float32(cell_y) + 0.5) * pixel_width distance_y -= float32(y_pos) distance_y_2 = distance_y * distance_y r = sqrt(distance_x_2 + distance_y_2) kernel_eval = kernel(r, kernel_width) cuda.atomic.add(img, (cell_x, cell_y), mass * kernel_eval)
[docs]def scatter( x: float64, y: float64, m: float32, h: float32, res: int, box_x: float64 = 0.0, box_y: float64 = 0.0, ) -> ndarray: """ Parallel implementation of scatter Creates a weighted scatter plot. Computes contributions from particles with positions (`x`,`y`) with smoothing lengths `h` weighted by quantities `m`. This includes periodic boundary effects. Parameters ---------- x : np.array[float64] array of x-positions of the particles. Must be bounded by [0, 1]. y : np.array[float64] array of y-positions of the particles. Must be bounded by [0, 1]. m : np.array[float32] array of masses (or otherwise weights) of the particles h : np.array[float32] array of smoothing lengths of the particles res : int the number of pixels along one axis, i.e. this returns a square of res * res. box_x: float64 box size in x, in the same rescaled length units as x and y. Used for periodic wrapping. box_y: float64 box size in y, in the same rescaled length units as x and y. Used for periodic wrapping. Returns ------- np.array[float32, float32, float32] pixel grid of quantity See Also -------- scatter : Creates 2D scatter plot from SWIFT data Notes ----- Explicitly defining the types in this function allows a performance improvement. """ if not CUDA_AVAILABLE or cuda is None: raise CudaSupportError( "Unable to load the CUDA extension to numba. This function " "is only available on systems with supported GPUs." ) output = cuda.device_array((res, res), dtype=float32) output[:] = 0 n_part = len(x) if box_x == 0.0: n_xshift = 1 else: n_xshift = 3 if box_y == 0.0: n_yshift = 1 else: n_yshift = 3 # set up a 3D grid: # the first dimension are the particles # the second and third dimension are the periodic # copies for each particle threads_per_block = (16, 1, 1) blocks_per_grid = ( ceil(n_part / threads_per_block[0]), n_xshift // threads_per_block[1], n_yshift // threads_per_block[2], ) scatter_gpu[blocks_per_grid, threads_per_block](x, y, m, h, box_x, box_y, output) return output.copy_to_host()
scatter_parallel = scatter