Astaroth  2.2
integration.cuh File Reference
#include "user_kernels.h"
+ Include dependency graph for integration.cuh:
+ This graph shows which files directly or indirectly include this file:

Macros

#define REGISTERS_PER_THREAD   (255)
 
#define MAX_REGISTERS_PER_BLOCK   (65536)
 
#define MAX_THREADS_PER_BLOCK   (1024)
 
#define WARP_SIZE   (32)
 
#define make_int3(a, b, c)   (int3) { (int)a, (int)b, (int)c }
 
#define rk3(state_previous, state_current, rate_of_change, dt)   rk3_integrate<step_number>(state_previous, value(state_current), rate_of_change, dt)
 
#define WRITE_OUT(handle, value)   (write(buffer.out, handle, idx, value))
 
#define READ(handle)   (read_data(vertexIdx, globalVertexIdx, buffer.in, handle))
 
#define READ_OUT(handle)   (read_out(idx, buffer.out, handle))
 
#define GEN_PREPROCESSED_PARAM_BOILERPLATE   const int3 &vertexIdx, const int3 &globalVertexIdx
 
#define GEN_KERNEL_PARAM_BOILERPLATE   const int3 start, const int3 end, VertexBufferArray buffer
 
#define GEN_KERNEL_BUILTIN_VARIABLES_BOILERPLATE()
 
#define GEN_DEVICE_FUNC_HOOK(identifier)
 

Functions

AcResult acKernelAutoOptimizeIntegration (const int3 start, const int3 end, VertexBufferArray vba)
 
AcResult acKernelIntegrateSubstep (const cudaStream_t stream, const int step_number, const int3 start, const int3 end, VertexBufferArray vba)
 
AcResult acKernelDummy (void)
 

Macro Definition Documentation

◆ GEN_DEVICE_FUNC_HOOK

#define GEN_DEVICE_FUNC_HOOK (   identifier)
Value:
template <int step_number> \
AcResult acDeviceKernel_##identifier(const cudaStream_t stream, const int3 start, \
const int3 end, VertexBufferArray vba) \
{ \
\
const dim3 tpb(32, 1, 4); \
\
const int3 n = end - start; \
const dim3 bpg((unsigned int)ceil(n.x / AcReal(tpb.x)), \
(unsigned int)ceil(n.y / AcReal(tpb.y)), \
(unsigned int)ceil(n.z / AcReal(tpb.z))); \
\
identifier<step_number><<<bpg, tpb, 0, stream>>>(start, end, vba); \
ERRCHK_CUDA_KERNEL(); \
\
return AC_SUCCESS; \
}

◆ GEN_KERNEL_BUILTIN_VARIABLES_BOILERPLATE

#define GEN_KERNEL_BUILTIN_VARIABLES_BOILERPLATE ( )
Value:
const int3 vertexIdx = (int3){threadIdx.x + blockIdx.x * blockDim.x + start.x, \
threadIdx.y + blockIdx.y * blockDim.y + start.y, \
threadIdx.z + blockIdx.z * blockDim.z + start.z}; \
const int3 globalVertexIdx = (int3){d_multigpu_offset.x + vertexIdx.x, \
d_multigpu_offset.y + vertexIdx.y, \
d_multigpu_offset.z + vertexIdx.z}; \
(void)globalVertexIdx; \
if (vertexIdx.x >= end.x || vertexIdx.y >= end.y || vertexIdx.z >= end.z) \
return; \
\
assert(vertexIdx.x < DCONST(AC_nx_max) && vertexIdx.y < DCONST(AC_ny_max) && \
vertexIdx.z < DCONST(AC_nz_max)); \
\
assert(vertexIdx.x >= DCONST(AC_nx_min) && vertexIdx.y >= DCONST(AC_ny_min) && \
vertexIdx.z >= DCONST(AC_nz_min)); \
\
const int idx = IDX(vertexIdx.x, vertexIdx.y, vertexIdx.z);

◆ GEN_KERNEL_PARAM_BOILERPLATE

#define GEN_KERNEL_PARAM_BOILERPLATE   const int3 start, const int3 end, VertexBufferArray buffer

◆ GEN_PREPROCESSED_PARAM_BOILERPLATE

#define GEN_PREPROCESSED_PARAM_BOILERPLATE   const int3 &vertexIdx, const int3 &globalVertexIdx

◆ make_int3

#define make_int3 (   a,
  b,
 
)    (int3) { (int)a, (int)b, (int)c }

◆ MAX_REGISTERS_PER_BLOCK

#define MAX_REGISTERS_PER_BLOCK   (65536)

◆ MAX_THREADS_PER_BLOCK

#define MAX_THREADS_PER_BLOCK   (1024)

◆ READ

#define READ (   handle)    (read_data(vertexIdx, globalVertexIdx, buffer.in, handle))

◆ READ_OUT

#define READ_OUT (   handle)    (read_out(idx, buffer.out, handle))

◆ REGISTERS_PER_THREAD

#define REGISTERS_PER_THREAD   (255)

◆ rk3

#define rk3 (   state_previous,
  state_current,
  rate_of_change,
  dt 
)    rk3_integrate<step_number>(state_previous, value(state_current), rate_of_change, dt)

◆ WARP_SIZE

#define WARP_SIZE   (32)

◆ WRITE_OUT

#define WRITE_OUT (   handle,
  value 
)    (write(buffer.out, handle, idx, value))

Function Documentation

◆ acKernelAutoOptimizeIntegration()

AcResult acKernelAutoOptimizeIntegration ( const int3  start,
const int3  end,
VertexBufferArray  vba 
)

◆ acKernelDummy()

AcResult acKernelDummy ( void  )

◆ acKernelIntegrateSubstep()

AcResult acKernelIntegrateSubstep ( const cudaStream_t  stream,
const int  step_number,
const int3  start,
const int3  end,
VertexBufferArray  vba 
)
AcReal
float AcReal
Definition: astaroth.h:38
d_multigpu_offset
#define d_multigpu_offset
Definition: kernels.cu:44
AC_SUCCESS
@ AC_SUCCESS
Definition: astaroth.h:51
VertexBufferArray
Definition: kernels.h:9