Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 12 additions & 8 deletions benchmark/benchmark.cu
Original file line number Diff line number Diff line change
Expand Up @@ -267,20 +267,20 @@ int main(int argc, char** argv) {
#ifdef R2C
// Get x-pencil information (real)
cudecompPencilInfo_t pinfo_x_r;
CHECK_CUDECOMP_EXIT(cudecompGetPencilInfo(handle, grid_desc_r, &pinfo_x_r, 0, nullptr));
CHECK_CUDECOMP_EXIT(cudecompGetPencilInfo(handle, grid_desc_r, &pinfo_x_r, 0, nullptr, nullptr));
#endif

// Get x-pencil information (complex)
cudecompPencilInfo_t pinfo_x_c;
CHECK_CUDECOMP_EXIT(cudecompGetPencilInfo(handle, grid_desc_c, &pinfo_x_c, 0, nullptr));
CHECK_CUDECOMP_EXIT(cudecompGetPencilInfo(handle, grid_desc_c, &pinfo_x_c, 0, nullptr, nullptr));

// Get y-pencil information (complex)
cudecompPencilInfo_t pinfo_y_c;
CHECK_CUDECOMP_EXIT(cudecompGetPencilInfo(handle, grid_desc_c, &pinfo_y_c, 1, nullptr));
CHECK_CUDECOMP_EXIT(cudecompGetPencilInfo(handle, grid_desc_c, &pinfo_y_c, 1, nullptr, nullptr));

// Get z-pencil information (complex)
cudecompPencilInfo_t pinfo_z_c;
CHECK_CUDECOMP_EXIT(cudecompGetPencilInfo(handle, grid_desc_c, &pinfo_z_c, 2, nullptr));
CHECK_CUDECOMP_EXIT(cudecompGetPencilInfo(handle, grid_desc_c, &pinfo_z_c, 2, nullptr, nullptr));

// Get workspace size
int64_t num_elements_work_c;
Expand Down Expand Up @@ -508,7 +508,8 @@ int main(int argc, char** argv) {

if (!slab_xyz) {
CHECK_CUDECOMP_EXIT(cudecompTransposeXToY(handle, grid_desc_c, input, output, work_c_d,
get_cudecomp_datatype(complex_t(0)), nullptr, nullptr, 0));
get_cudecomp_datatype(complex_t(0)), nullptr, nullptr,
nullptr, nullptr, 0));
}

if (!slab_xy && !slab_xyz) {
Expand All @@ -530,7 +531,8 @@ int main(int argc, char** argv) {
// For y-z slab case, no need to perform yz transposes or z-axis FFT
if (!slab_yz && !slab_xyz) {
CHECK_CUDECOMP_EXIT(cudecompTransposeYToZ(handle, grid_desc_c, input, output, work_c_d,
get_cudecomp_datatype(complex_t(0)), nullptr, nullptr, 0));
get_cudecomp_datatype(complex_t(0)), nullptr, nullptr,
nullptr, nullptr, 0));
}

if (!slab_yz && !slab_xyz) {
Expand All @@ -545,7 +547,8 @@ int main(int argc, char** argv) {

if (!slab_yz && !slab_xyz) {
CHECK_CUDECOMP_EXIT(cudecompTransposeZToY(handle, grid_desc_c, input, output, work_c_d,
get_cudecomp_datatype(complex_t(0)), nullptr, nullptr, 0));
get_cudecomp_datatype(complex_t(0)), nullptr, nullptr,
nullptr, nullptr, 0));
}

if (!slab_xy && !slab_xyz) {
Expand All @@ -566,7 +569,8 @@ int main(int argc, char** argv) {

if (!slab_xyz) {
CHECK_CUDECOMP_EXIT(cudecompTransposeYToX(handle, grid_desc_c, input, output, work_c_d,
get_cudecomp_datatype(complex_t(0)), nullptr, nullptr, 0));
get_cudecomp_datatype(complex_t(0)), nullptr, nullptr,
nullptr, nullptr, 0));
}
#ifdef R2C
CHECK_CUFFT_EXIT(cufftXtExec(cufft_plan_c2r_x, output, output_r, CUFFT_INVERSE));
Expand Down
58 changes: 37 additions & 21 deletions docs/api/f_api.rst

Large diffs are not rendered by default.

38 changes: 21 additions & 17 deletions docs/basic_usage.rst
Original file line number Diff line number Diff line change
Expand Up @@ -221,10 +221,9 @@ First, we can query basic information (i.e. metadata) about the pencil configura
assigned to this process using the :ref:`cudecompGetPencilInfo-ref` function. This function returns a
pencil info structure (:ref:`cudecompPencilInfo_t-ref`) that contains the shape, global lower and upper
index bounds (:code:`lo` and :code:`hi`), size of the pencil, and an :code:`order` array to indicate the memory layout
that will be used (to handle permuted, `axis-contiguous` layouts). Additionally, there is a :code:`halo_extents` data
member that indicates the depth of halos for the pencil, by axis, if the argument was provided
to this function. This data member is a copy of the argument provided to the function
and is stored for convenience.
that will be used (to handle permuted layouts). Additionally, there are :code:`halo_extents` and :code:`padding` data
members that indicates halo and padding configurations for the pencil, by axis. This data member is a copy of the
argument provided to the function and is stored for convenience.

It should be noted that these metadata structures are provided solely for users to
interpret and access data from the data buffers used as input/output arguments to the different
Expand All @@ -234,7 +233,8 @@ for pencil buffers, nor uses these pencil information structures as input argume
In this example, we apply halo elements to the :math:`X`-pencils only. For the other pencils,
we instead pass a :code:`nullptr` for the :code:`halo_extents` argument, which is equivalent
to setting :code:`halo_extents = [0, 0, 0]` in C/C++. For Fortran, :code:`halo_extents` is optional
and defaults to no halo regions.
and defaults to no halo regions. Similarly, we pass a :code:`nullptr` for the :code:`padding` argument to specify
no padding for all pencils. For Fortran, :code:`padding` is optional, and defaults to no padding.

.. tabs::

Expand All @@ -243,15 +243,15 @@ and defaults to no halo regions.
// Get X-pencil information (with halo elements).
cudecompPencilInfo_t pinfo_x;
int32_t halo_extents_x[3]{1, 1, 1};
CHECK_CUDECOMP_EXIT(cudecompGetPencilInfo(handle, grid_desc, &pinfo_x, 0, halo_extents_x));
CHECK_CUDECOMP_EXIT(cudecompGetPencilInfo(handle, grid_desc, &pinfo_x, 0, halo_extents_x, nullptr));

// Get Y-pencil information
cudecompPencilInfo_t pinfo_y;
CHECK_CUDECOMP_EXIT(cudecompGetPencilInfo(handle, grid_desc, &pinfo_y, 1, nullptr));
CHECK_CUDECOMP_EXIT(cudecompGetPencilInfo(handle, grid_desc, &pinfo_y, 1, nullptr, nullptr));

// Get Z-pencil information
cudecompPencilInfo_t pinfo_z;
CHECK_CUDECOMP_EXIT(cudecompGetPencilInfo(handle, grid_desc, &pinfo_z, 2, nullptr));
CHECK_CUDECOMP_EXIT(cudecompGetPencilInfo(handle, grid_desc, &pinfo_z, 2, nullptr, nullptr));

.. code-tab:: fortran

Expand Down Expand Up @@ -572,28 +572,30 @@ Transposing the data
--------------------
Now, we can use cuDecomp's transposition routines to transpose our data. In these calls, we are using
the :code:`data_d` array as both input and output (in-place), but you can also use distinct input and output buffers for
out-of-place operations. For the transposes between :math:`Y`- and :math:`Z`-pencils, we can pass
null pointers to the halo extent arguments to the routines to ignore them in C/C++, or leave them unspecified in Fortran.
out-of-place operations. For the transposes between :math:`Y`- and :math:`Z`-pencils, we pass
null pointers to the halo extent arguments to the routines in C/C++, or leave them unspecified in Fortran.
For all transposes, we pass null pointers to the padding arguments to the routines to disable padding in C/C++, or leave
them unspecified in Fortran.

.. tabs::

.. code-tab:: c++

// Transpose from X-pencils to Y-pencils.
CHECK_CUDECOMP_EXIT(cudecompTransposeXToY(handle, grid_desc, data_d, data_d, transpose_work_d,
CUDECOMP_DOUBLE, pinfo_x.halo_extents, nullptr, 0));
CUDECOMP_DOUBLE, pinfo_x.halo_extents, nullptr, nullptr, nullptr, 0));

// Transpose from Y-pencils to Z-pencils.
CHECK_CUDECOMP_EXIT(cudecompTransposeYToZ(handle, grid_desc, data_d, data_d, transpose_work_d,
CUDECOMP_DOUBLE, nullptr, nullptr, 0));
CUDECOMP_DOUBLE, nullptr, nullptr, nullptr, nullptr, 0));

// Transpose from Z-pencils to Y-pencils.
CHECK_CUDECOMP_EXIT(cudecompTransposeZToY(handle, grid_desc, data_d, data_d, transpose_work_d,
CUDECOMP_DOUBLE, nullptr, nullptr, 0));
CUDECOMP_DOUBLE, nullptr, nullptr, nullptr, nullptr, 0));

// Transpose from Y-pencils to X-pencils.
CHECK_CUDECOMP_EXIT(cudecompTransposeYToX(handle, grid_desc, data_d, data_d, transpose_work_d,
CUDECOMP_DOUBLE, nullptr, pinfo_x.halo_extents, 0));
CUDECOMP_DOUBLE, nullptr, pinfo_x.halo_extents, nullptr, nullptr, 0));

.. code-tab:: fortran

Expand All @@ -618,6 +620,8 @@ Updating halo regions
In this example, we have halos for the :math:`X`-pencils only. We can use cuDecomp's halo update
routines to update the halo regions of this pencil in the three domain directions. In this example,
we set the :code:`halo_periods` argument to enable periodic halos along all directions.
We pass null pointers to the padding arguments to the routines to disable padding in C/C++, or leave
them unspecified in Fortran.

.. tabs::

Expand All @@ -629,17 +633,17 @@ we set the :code:`halo_periods` argument to enable periodic halos along all dire
// Update X-pencil halos in X direction
CHECK_CUDECOMP_EXIT(cudecompUpdateHalosX(handle, grid_desc, data_d, halo_work_d,
CUDECOMP_DOUBLE, pinfo_x.halo_extents, halo_periods,
0, 0));
0, nullptr, 0));

// Update X-pencil halos in Y direction
CHECK_CUDECOMP_EXIT(cudecompUpdateHalosX(handle, grid_desc, data_d, halo_work_d,
CUDECOMP_DOUBLE, pinfo_x.halo_extents, halo_periods,
1, 0));
1, nullptr, 0));

// Update X-pencil halos in Z direction
CHECK_CUDECOMP_EXIT(cudecompUpdateHalosX(handle, grid_desc, data_d, halo_work_d,
CUDECOMP_DOUBLE, pinfo_x.halo_extents, halo_periods,
2, 0));
2, nullptr, 0));

.. code-tab:: fortran

Expand Down
20 changes: 10 additions & 10 deletions examples/cc/basic_usage/basic_usage.cu
Original file line number Diff line number Diff line change
Expand Up @@ -157,15 +157,15 @@ int main(int argc, char** argv) {
// Get X-pencil information (with halo elements)
cudecompPencilInfo_t pinfo_x;
int32_t halo_extents_x[3]{1, 1, 1};
CHECK_CUDECOMP_EXIT(cudecompGetPencilInfo(handle, grid_desc, &pinfo_x, 0, halo_extents_x));
CHECK_CUDECOMP_EXIT(cudecompGetPencilInfo(handle, grid_desc, &pinfo_x, 0, halo_extents_x, nullptr));

// Get Y-pencil information
cudecompPencilInfo_t pinfo_y;
CHECK_CUDECOMP_EXIT(cudecompGetPencilInfo(handle, grid_desc, &pinfo_y, 1, nullptr));
CHECK_CUDECOMP_EXIT(cudecompGetPencilInfo(handle, grid_desc, &pinfo_y, 1, nullptr, nullptr));

// Get Z-pencil information
cudecompPencilInfo_t pinfo_z;
CHECK_CUDECOMP_EXIT(cudecompGetPencilInfo(handle, grid_desc, &pinfo_z, 2, nullptr));
CHECK_CUDECOMP_EXIT(cudecompGetPencilInfo(handle, grid_desc, &pinfo_z, 2, nullptr, nullptr));

// Allocate pencil memory
int64_t data_num_elements = std::max(std::max(pinfo_x.size, pinfo_y.size), pinfo_z.size);
Expand Down Expand Up @@ -263,19 +263,19 @@ int main(int argc, char** argv) {

// Transpose from X-pencils to Y-pencils.
CHECK_CUDECOMP_EXIT(cudecompTransposeXToY(handle, grid_desc, data_d, data_d, transpose_work_d, CUDECOMP_DOUBLE,
pinfo_x.halo_extents, nullptr, 0));
pinfo_x.halo_extents, nullptr, nullptr, nullptr, 0));

// Transpose from Y-pencils to Z-pencils.
CHECK_CUDECOMP_EXIT(
cudecompTransposeYToZ(handle, grid_desc, data_d, data_d, transpose_work_d, CUDECOMP_DOUBLE, nullptr, nullptr, 0));
cudecompTransposeYToZ(handle, grid_desc, data_d, data_d, transpose_work_d, CUDECOMP_DOUBLE, nullptr, nullptr, nullptr, nullptr, 0));

// Transpose from Z-pencils to Y-pencils.
CHECK_CUDECOMP_EXIT(
cudecompTransposeZToY(handle, grid_desc, data_d, data_d, transpose_work_d, CUDECOMP_DOUBLE, nullptr, nullptr, 0));
cudecompTransposeZToY(handle, grid_desc, data_d, data_d, transpose_work_d, CUDECOMP_DOUBLE, nullptr, nullptr, nullptr, nullptr, 0));

// Transpose from Y-pencils to X-pencils.
CHECK_CUDECOMP_EXIT(cudecompTransposeYToX(handle, grid_desc, data_d, data_d, transpose_work_d, CUDECOMP_DOUBLE,
nullptr, pinfo_x.halo_extents, 0));
nullptr, pinfo_x.halo_extents, nullptr, nullptr, 0));

// Updating halos

Expand All @@ -284,15 +284,15 @@ int main(int argc, char** argv) {

// Update X-pencil halos in X direction
CHECK_CUDECOMP_EXIT(cudecompUpdateHalosX(handle, grid_desc, data_d, halo_work_d, CUDECOMP_DOUBLE,
pinfo_x.halo_extents, halo_periods, 0, 0));
pinfo_x.halo_extents, halo_periods, 0, nullptr, 0));

// Update X-pencil halos in Y direction
CHECK_CUDECOMP_EXIT(cudecompUpdateHalosX(handle, grid_desc, data_d, halo_work_d, CUDECOMP_DOUBLE,
pinfo_x.halo_extents, halo_periods, 1, 0));
pinfo_x.halo_extents, halo_periods, 1, nullptr, 0));

// Update X-pencil halos in Z direction
CHECK_CUDECOMP_EXIT(cudecompUpdateHalosX(handle, grid_desc, data_d, halo_work_d, CUDECOMP_DOUBLE,
pinfo_x.halo_extents, halo_periods, 2, 0));
pinfo_x.halo_extents, halo_periods, 2, nullptr, 0));

// Cleanup resources
free(data);
Expand Down
20 changes: 10 additions & 10 deletions examples/cc/basic_usage/basic_usage_autotune.cu
Original file line number Diff line number Diff line change
Expand Up @@ -190,15 +190,15 @@ int main(int argc, char** argv) {
// Get X-pencil information (with halo elements).
cudecompPencilInfo_t pinfo_x;
int32_t halo_extents_x[3]{1, 1, 1};
CHECK_CUDECOMP_EXIT(cudecompGetPencilInfo(handle, grid_desc, &pinfo_x, 0, halo_extents_x));
CHECK_CUDECOMP_EXIT(cudecompGetPencilInfo(handle, grid_desc, &pinfo_x, 0, halo_extents_x, nullptr));

// Get Y-pencil information
cudecompPencilInfo_t pinfo_y;
CHECK_CUDECOMP_EXIT(cudecompGetPencilInfo(handle, grid_desc, &pinfo_y, 1, nullptr));
CHECK_CUDECOMP_EXIT(cudecompGetPencilInfo(handle, grid_desc, &pinfo_y, 1, nullptr, nullptr));

// Get Z-pencil information
cudecompPencilInfo_t pinfo_z;
CHECK_CUDECOMP_EXIT(cudecompGetPencilInfo(handle, grid_desc, &pinfo_z, 2, nullptr));
CHECK_CUDECOMP_EXIT(cudecompGetPencilInfo(handle, grid_desc, &pinfo_z, 2, nullptr, nullptr));

// Allocate pencil memory
int64_t data_num_elements = std::max(std::max(pinfo_x.size, pinfo_y.size), pinfo_z.size);
Expand Down Expand Up @@ -241,19 +241,19 @@ int main(int argc, char** argv) {

// Transpose from X-pencils to Y-pencils.
CHECK_CUDECOMP_EXIT(cudecompTransposeXToY(handle, grid_desc, data_d, data_d, transpose_work_d, CUDECOMP_DOUBLE,
pinfo_x.halo_extents, nullptr, 0));
pinfo_x.halo_extents, nullptr, nullptr, nullptr, 0));

// Transpose from Y-pencils to Z-pencils.
CHECK_CUDECOMP_EXIT(
cudecompTransposeYToZ(handle, grid_desc, data_d, data_d, transpose_work_d, CUDECOMP_DOUBLE, nullptr, nullptr, 0));
cudecompTransposeYToZ(handle, grid_desc, data_d, data_d, transpose_work_d, CUDECOMP_DOUBLE, nullptr, nullptr, nullptr, nullptr, 0));

// Transpose from Z-pencils to Y-pencils.
CHECK_CUDECOMP_EXIT(
cudecompTransposeZToY(handle, grid_desc, data_d, data_d, transpose_work_d, CUDECOMP_DOUBLE, nullptr, nullptr, 0));
cudecompTransposeZToY(handle, grid_desc, data_d, data_d, transpose_work_d, CUDECOMP_DOUBLE, nullptr, nullptr, nullptr, nullptr, 0));

// Transpose from Y-pencils to X-pencils.
CHECK_CUDECOMP_EXIT(cudecompTransposeYToX(handle, grid_desc, data_d, data_d, transpose_work_d, CUDECOMP_DOUBLE,
nullptr, pinfo_x.halo_extents, 0));
nullptr, pinfo_x.halo_extents, nullptr, nullptr, 0));

// Updating halos

Expand All @@ -262,15 +262,15 @@ int main(int argc, char** argv) {

// Update X-pencil halos in X direction
CHECK_CUDECOMP_EXIT(cudecompUpdateHalosX(handle, grid_desc, data_d, halo_work_d, CUDECOMP_DOUBLE,
pinfo_x.halo_extents, halo_periods, 0, 0));
pinfo_x.halo_extents, halo_periods, 0, nullptr, 0));

// Update X-pencil halos in Y direction
CHECK_CUDECOMP_EXIT(cudecompUpdateHalosX(handle, grid_desc, data_d, halo_work_d, CUDECOMP_DOUBLE,
pinfo_x.halo_extents, halo_periods, 1, 0));
pinfo_x.halo_extents, halo_periods, 1, nullptr, 0));

// Update X-pencil halos in Z direction
CHECK_CUDECOMP_EXIT(cudecompUpdateHalosX(handle, grid_desc, data_d, halo_work_d, CUDECOMP_DOUBLE,
pinfo_x.halo_extents, halo_periods, 2, 0));
pinfo_x.halo_extents, halo_periods, 2, nullptr, 0));

// Cleanup resources
free(data);
Expand Down
16 changes: 8 additions & 8 deletions examples/cc/taylor_green/tg.cu
Original file line number Diff line number Diff line change
Expand Up @@ -381,16 +381,16 @@ public:
cudecompGridDescCreate(handle, &grid_desc_r, &config, nullptr);

// Get x-pencil information (real)
cudecompGetPencilInfo(handle, grid_desc_r, &pinfo_x_r, 0, nullptr);
cudecompGetPencilInfo(handle, grid_desc_r, &pinfo_x_r, 0, nullptr, nullptr);

// Get x-pencil information (complex)
cudecompGetPencilInfo(handle, grid_desc_c, &pinfo_x_c, 0, nullptr);
cudecompGetPencilInfo(handle, grid_desc_c, &pinfo_x_c, 0, nullptr, nullptr);

// Get y-pencil information (complex)
cudecompGetPencilInfo(handle, grid_desc_c, &pinfo_y_c, 1, nullptr);
cudecompGetPencilInfo(handle, grid_desc_c, &pinfo_y_c, 1, nullptr, nullptr);

// Get z-pencil information (complex)
cudecompGetPencilInfo(handle, grid_desc_c, &pinfo_z_c, 2, nullptr);
cudecompGetPencilInfo(handle, grid_desc_c, &pinfo_z_c, 2, nullptr, nullptr);

// Get workspace size (only complex workspace required)
int64_t num_elements_work_c;
Expand Down Expand Up @@ -665,10 +665,10 @@ private:
for (int i = 0; i < 3; ++i) {
CHECK_CUFFT_EXIT(cufftXtExec(cufft_plan_r2c_x, U_r[i], U_c[i], CUFFT_FORWARD));
cudecompTransposeXToY(handle, grid_desc_c, U_c[i], U_c[i], work_c, get_cudecomp_datatype(complex_t(0)), nullptr,
nullptr, 0);
nullptr, nullptr, nullptr, 0);
CHECK_CUFFT_EXIT(cufftXtExec(cufft_plan_c2c_y, U_c[i], U_c[i], CUFFT_FORWARD));
cudecompTransposeYToZ(handle, grid_desc_c, U_c[i], U_c[i], work_c, get_cudecomp_datatype(complex_t(0)), nullptr,
nullptr, 0);
nullptr, nullptr, nullptr, 0);
CHECK_CUFFT_EXIT(cufftXtExec(cufft_plan_c2c_z, U_c[i], U_c[i], CUFFT_FORWARD));
}
}
Expand All @@ -677,10 +677,10 @@ private:
for (int i = 0; i < 3; ++i) {
CHECK_CUFFT_EXIT(cufftXtExec(cufft_plan_c2c_z, U_c[i], U_c[i], CUFFT_INVERSE));
cudecompTransposeZToY(handle, grid_desc_c, U_c[i], U_c[i], work_c, get_cudecomp_datatype(complex_t(0)), nullptr,
nullptr, 0);
nullptr, nullptr, nullptr, 0);
CHECK_CUFFT_EXIT(cufftXtExec(cufft_plan_c2c_y, U_c[i], U_c[i], CUFFT_INVERSE));
cudecompTransposeYToX(handle, grid_desc_c, U_c[i], U_c[i], work_c, get_cudecomp_datatype(complex_t(0)), nullptr,
nullptr, 0);
nullptr, nullptr, nullptr, 0);
CHECK_CUFFT_EXIT(cufftXtExec(cufft_plan_c2r_x, U_c[i], U_r[i], CUFFT_INVERSE));
}
}
Expand Down
Loading