From 49e25fb8f737e56b86a62e9a4ae835cbaf5aec55 Mon Sep 17 00:00:00 2001 From: Huiyu Xie Date: Tue, 3 Sep 2024 18:50:37 -1000 Subject: [PATCH] Use math expression to enhance performance (#30) * Start * Change for 1D * Change for 2D * Change for 3D * Replace used variables --- src/solvers/dg_1d.jl | 6 +- src/solvers/dg_2d.jl | 115 ++++++------ src/solvers/dg_3d.jl | 230 ++++++++++++++---------- test/runtests.jl | 5 + test/test_adevction_mortar.jl | 5 - test/test_advection_basic.jl | 5 - test/test_euler_ec.jl | 5 - test/test_euler_source_terms.jl | 5 - test/test_hypdiff_nonperiodic.jl | 5 - test/test_shallowwater_well_balanced.jl | 5 - 10 files changed, 204 insertions(+), 182 deletions(-) diff --git a/src/solvers/dg_1d.jl b/src/solvers/dg_1d.jl index 1938552..233d8cb 100644 --- a/src/solvers/dg_1d.jl +++ b/src/solvers/dg_1d.jl @@ -245,8 +245,8 @@ function prolong_boundaries_kernel!(boundaries_u, u, neighbor_ids, neighbor_side side = neighbor_sides[k] @inbounds begin - boundaries_u[1, j, k] = u[j, size(u, 2), element] * isequal(side, 1) # set to 0 instead of NaN - boundaries_u[2, j, k] = u[j, 1, element] * (1 - isequal(side, 1)) # set to 0 instead of NaN + boundaries_u[1, j, k] = u[j, size(u, 2), element] * (2 - side) # set to 0 instead of NaN + boundaries_u[2, j, k] = u[j, 1, element] * (side - 1) # set to 0 instead of NaN end end @@ -269,7 +269,7 @@ function boundary_flux_kernel!(surface_flux_values, boundaries_u, node_coordinat orientation = orientations[boundary] u_ll, u_rr = get_surface_node_vars(boundaries_u, equations, boundary) - u_inner = isequal(side, 1) * u_ll + (1 - isequal(side, 1)) * u_rr + u_inner = (2 - side) * u_ll + (side - 1) * u_rr x = get_node_coords(node_coordinates, equations, boundary) # TODO: Improve this part diff --git a/src/solvers/dg_2d.jl b/src/solvers/dg_2d.jl index 8f318e0..ca837f8 100644 --- a/src/solvers/dg_2d.jl +++ b/src/solvers/dg_2d.jl @@ -57,9 +57,11 @@ function volume_flux_kernel!(volume_flux_arr1, volume_flux_arr2, u, equations::A k = (blockIdx().y - 1) * blockDim().y + threadIdx().y if (j <= size(u, 2)^3 && k <= size(u, 4)) - j1 = div(j - 1, size(u, 2)^2) + 1 - j2 = div(rem(j - 1, size(u, 2)^2), size(u, 2)) + 1 - j3 = rem(rem(j - 1, size(u, 2)^2), size(u, 2)) + 1 + u2 = size(u, 2) + + j1 = div(j - 1, u2^2) + 1 + j2 = div(rem(j - 1, u2^2), u2) + 1 + j3 = rem(rem(j - 1, u2^2), u2) + 1 u_node = get_node_vars(u, equations, j1, j2, k) u_node1 = get_node_vars(u, equations, j3, j2, k) @@ -88,9 +90,11 @@ function symmetric_noncons_flux_kernel!(symmetric_flux_arr1, symmetric_flux_arr2 k = (blockIdx().y - 1) * blockDim().y + threadIdx().y if (j <= size(u, 2)^3 && k <= size(u, 4)) - j1 = div(j - 1, size(u, 2)^2) + 1 - j2 = div(rem(j - 1, size(u, 2)^2), size(u, 2)) + 1 - j3 = rem(rem(j - 1, size(u, 2)^2), size(u, 2)) + 1 + u2 = size(u, 2) + + j1 = div(j - 1, u2^2) + 1 + j2 = div(rem(j - 1, u2^2), u2) + 1 + j3 = rem(rem(j - 1, u2^2), u2) + 1 u_node = get_node_vars(u, equations, j1, j2, k) u_node1 = get_node_vars(u, equations, j3, j2, k) @@ -176,23 +180,24 @@ function prolong_interfaces_kernel!(interfaces_u, u, neighbor_ids, orientations, k = (blockIdx().y - 1) * blockDim().y + threadIdx().y if (j <= size(interfaces_u, 2) * size(interfaces_u, 3) && k <= size(interfaces_u, 4)) - j1 = div(j - 1, size(interfaces_u, 3)) + 1 - j2 = rem(j - 1, size(interfaces_u, 3)) + 1 + # size(interfaces_u, 3) == size(u, 2) + u2 = size(u, 2) + + j1 = div(j - 1, u2) + 1 + j2 = rem(j - 1, u2) + 1 orientation = orientations[k] left_element = neighbor_ids[1, k] right_element = neighbor_ids[2, k] - u2 = size(u, 2) - @inbounds begin interfaces_u[1, j1, j2, k] = u[j1, - isequal(orientation, 1) * u2 + isequal(orientation, 2) * j2, - isequal(orientation, 1) * j2 + isequal(orientation, 2) * u2, + (2 - orientation) * u2 + (orientation - 1) * j2, + (2 - orientation) * j2 + (orientation - 1) * u2, left_element] interfaces_u[2, j1, j2, k] = u[j1, - isequal(orientation, 1) * 1 + isequal(orientation, 2) * j2, - isequal(orientation, 1) * j2 + isequal(orientation, 2) * 1, + (2 - orientation) + (orientation - 1) * j2, + (2 - orientation) * j2 + (orientation - 1), right_element] end end @@ -310,24 +315,25 @@ function prolong_boundaries_kernel!(boundaries_u, u, neighbor_ids, neighbor_side k = (blockIdx().y - 1) * blockDim().y + threadIdx().y if (j <= size(boundaries_u, 2) * size(boundaries_u, 3) && k <= size(boundaries_u, 4)) - j1 = div(j - 1, size(boundaries_u, 3)) + 1 - j2 = rem(j - 1, size(boundaries_u, 3)) + 1 + # size(boundaries_u, 3) == size(u, 2) + u2 = size(u, 2) + + j1 = div(j - 1, u2) + 1 + j2 = rem(j - 1, u2) + 1 element = neighbor_ids[k] side = neighbor_sides[k] orientation = orientations[k] - u2 = size(u, 2) - @inbounds begin boundaries_u[1, j1, j2, k] = u[j1, - isequal(orientation, 1) * u2 + isequal(orientation, 2) * j2, - isequal(orientation, 1) * j2 + isequal(orientation, 2) * u2, - element] * isequal(side, 1) # Set to 0 instead of NaN + (2 - orientation) * u2 + (orientation - 1) * j2, + (2 - orientation) * j2 + (orientation - 1) * u2, + element] * (2 - side) # Set to 0 instead of NaN boundaries_u[2, j1, j2, k] = u[j1, - isequal(orientation, 1) * 1 + isequal(orientation, 2) * j2, - isequal(orientation, 1) * j2 + isequal(orientation, 2) * 1, - element] * (1 - isequal(side, 1)) # Set to 0 instead of NaN + (2 - orientation) + (orientation - 1) * j2, + (2 - orientation) * j2 + (orientation - 1), + element] * (side - 1) # Set to 0 instead of NaN end end @@ -352,7 +358,7 @@ function boundary_flux_kernel!(surface_flux_values, boundaries_u, node_coordinat orientation = orientations[boundary] u_ll, u_rr = get_surface_node_vars(boundaries_u, equations, j, boundary) - u_inner = isequal(side, 1) * u_ll + (1 - isequal(side, 1)) * u_rr + u_inner = (2 - side) * u_ll + (side - 1) * u_rr x = get_node_coords(node_coordinates, equations, j, boundary) # TODO: Improve this part @@ -402,24 +408,24 @@ function prolong_mortars_small2small_kernel!(u_upper, u_lower, u, neighbor_ids, @inbounds begin u_upper[2, i, j, k] = u[i, - isequal(orientation, 1) * 1 + isequal(orientation, 2) * j, - isequal(orientation, 1) * j + isequal(orientation, 2) * 1, - upper_element] * isequal(large_side, 1) + (2 - orientation) + (orientation - 1) * j, + (2 - orientation) * j + (orientation - 1), + upper_element] * (2 - large_side) u_lower[2, i, j, k] = u[i, - isequal(orientation, 1) * 1 + isequal(orientation, 2) * j, - isequal(orientation, 1) * j + isequal(orientation, 2) * 1, - lower_element] * isequal(large_side, 1) + (2 - orientation) + (orientation - 1) * j, + (2 - orientation) * j + (orientation - 1), + lower_element] * (2 - large_side) u_upper[1, i, j, k] = u[i, - isequal(orientation, 1) * u2 + isequal(orientation, 2) * j, - isequal(orientation, 1) * j + isequal(orientation, 2) * u2, - upper_element] * isequal(large_side, 2) + (2 - orientation) * u2 + (orientation - 1) * j, + (2 - orientation) * j + (orientation - 1) * u2, + upper_element] * (large_side - 1) u_lower[1, i, j, k] = u[i, - isequal(orientation, 1) * u2 + isequal(orientation, 2) * j, - isequal(orientation, 1) * j + isequal(orientation, 2) * u2, - lower_element] * isequal(large_side, 2) + (2 - orientation) * u2 + (orientation - 1) * j, + (2 - orientation) * j + (orientation - 1) * u2, + lower_element] * (large_side - 1) end end @@ -445,27 +451,27 @@ function prolong_mortars_large2small_kernel!(u_upper, u_lower, u, forward_upper, for jj in axes(forward_upper, 2) u_upper[leftright, i, j, k] += forward_upper[j, jj] * u[i, - isequal(orientation, 1) * u2 + isequal(orientation, 2) * jj, - isequal(orientation, 1) * jj + isequal(orientation, 2) * u2, - large_element] * isequal(large_side, 1) + (2 - orientation) * u2 + (orientation - 1) * jj, + (2 - orientation) * jj + (orientation - 1) * u2, + large_element] * (2 - large_side) u_lower[leftright, i, j, k] += forward_lower[j, jj] * u[i, - isequal(orientation, 1) * u2 + isequal(orientation, 2) * jj, - isequal(orientation, 1) * jj + isequal(orientation, 2) * u2, - large_element] * isequal(large_side, 1) + (2 - orientation) * u2 + (orientation - 1) * jj, + (2 - orientation) * jj + (orientation - 1) * u2, + large_element] * (2 - large_side) end for jj in axes(forward_lower, 2) u_upper[leftright, i, j, k] += forward_upper[j, jj] * u[i, - isequal(orientation, 1) * 1 + isequal(orientation, 2) * jj, - isequal(orientation, 1) * jj + isequal(orientation, 2) * 1, - large_element] * isequal(large_side, 2) + (2 - orientation) + (orientation - 1) * jj, + (2 - orientation) * jj + (orientation - 1), + large_element] * (large_side - 1) u_lower[leftright, i, j, k] += forward_lower[j, jj] * u[i, - isequal(orientation, 1) * 1 + isequal(orientation, 2) * jj, - isequal(orientation, 1) * jj + isequal(orientation, 2) * 1, - large_element] * isequal(large_side, 2) + (2 - orientation) + (orientation - 1) * jj, + (2 - orientation) * jj + (orientation - 1), + large_element] * (large_side - 1) end end end @@ -557,15 +563,16 @@ function surface_integral_kernel!(du, factor_arr, surface_flux_values, k = (blockIdx().z - 1) * blockDim().z + threadIdx().z if (i <= size(du, 1) && j <= size(du, 2)^2 && k <= size(du, 4)) - j1 = div(j - 1, size(du, 2)) + 1 - j2 = rem(j - 1, size(du, 2)) + 1 + u2 = size(du, 2) + + j1 = div(j - 1, u2) + 1 + j2 = rem(j - 1, u2) + 1 @inbounds begin du[i, j1, j2, k] -= (surface_flux_values[i, j2, 1, k] * isequal(j1, 1) + surface_flux_values[i, j1, 3, k] * isequal(j2, 1)) * factor_arr[1] - du[i, j1, j2, k] += (surface_flux_values[i, j2, 2, k] * isequal(j1, size(du, 2)) + - surface_flux_values[i, j1, 4, k] * isequal(j2, size(du, 2))) * - factor_arr[2] + du[i, j1, j2, k] += (surface_flux_values[i, j2, 2, k] * isequal(j1, u2) + + surface_flux_values[i, j1, 4, k] * isequal(j2, u2)) * factor_arr[2] end end diff --git a/src/solvers/dg_3d.jl b/src/solvers/dg_3d.jl index 37546cc..12f1c9b 100644 --- a/src/solvers/dg_3d.jl +++ b/src/solvers/dg_3d.jl @@ -11,9 +11,11 @@ function flux_kernel!(flux_arr1, flux_arr2, flux_arr3, u, equations::AbstractEqu k = (blockIdx().y - 1) * blockDim().y + threadIdx().y if (j <= size(u, 2)^3 && k <= size(u, 5)) - j1 = div(j - 1, size(u, 2)^2) + 1 - j2 = div(rem(j - 1, size(u, 2)^2), size(u, 2)) + 1 - j3 = rem(rem(j - 1, size(u, 2)^2), size(u, 2)) + 1 + u2 = size(u, 2) + + j1 = div(j - 1, u2^2) + 1 + j2 = div(rem(j - 1, u2^2), u2) + 1 + j3 = rem(rem(j - 1, u2^2), u2) + 1 u_node = get_node_vars(u, equations, j1, j2, j3, k) @@ -40,9 +42,12 @@ function weak_form_kernel!(du, derivative_dhat, flux_arr1, flux_arr2, flux_arr3) k = (blockIdx().z - 1) * blockDim().z + threadIdx().z if (i <= size(du, 1) && j <= size(du, 2)^3 && k <= size(du, 5)) - j1 = div(j - 1, size(du, 2)^2) + 1 - j2 = div(rem(j - 1, size(du, 2)^2), size(du, 2)) + 1 - j3 = rem(rem(j - 1, size(du, 2)^2), size(du, 2)) + 1 + # size(du, 2) == size(u, 2) + u2 = size(du, 2) + + j1 = div(j - 1, u2^2) + 1 + j2 = div(rem(j - 1, u2^2), u2) + 1 + j3 = rem(rem(j - 1, u2^2), u2) + 1 @inbounds begin for ii in axes(du, 2) @@ -63,10 +68,12 @@ function volume_flux_kernel!(volume_flux_arr1, volume_flux_arr2, volume_flux_arr k = (blockIdx().y - 1) * blockDim().y + threadIdx().y if (j <= size(u, 2)^4 && k <= size(u, 5)) - j1 = div(j - 1, size(u, 2)^3) + 1 - j2 = div(rem(j - 1, size(u, 2)^3), size(u, 2)^2) + 1 - j3 = div(rem(j - 1, size(u, 2)^2), size(u, 2)) + 1 - j4 = rem(j - 1, size(u, 2)) + 1 + u2 = size(u, 2) + + j1 = div(j - 1, u2^3) + 1 + j2 = div(rem(j - 1, u2^3), u2^2) + 1 + j3 = div(rem(j - 1, u2^2), u2) + 1 + j4 = rem(j - 1, u2) + 1 u_node = get_node_vars(u, equations, j1, j2, j3, k) u_node1 = get_node_vars(u, equations, j4, j2, j3, k) @@ -97,9 +104,12 @@ function volume_integral_kernel!(du, derivative_split, volume_flux_arr1, volume_ k = (blockIdx().z - 1) * blockDim().z + threadIdx().z if (i <= size(du, 1) && j <= size(du, 2)^3 && k <= size(du, 5)) - j1 = div(j - 1, size(du, 2)^2) + 1 - j2 = div(rem(j - 1, size(du, 2)^2), size(du, 2)) + 1 - j3 = rem(rem(j - 1, size(du, 2)^2), size(du, 2)) + 1 + # size(du, 2) == size(u, 2) + u2 = size(du, 2) + + j1 = div(j - 1, u2^2) + 1 + j2 = div(rem(j - 1, u2^2), u2) + 1 + j3 = rem(rem(j - 1, u2^2), u2) + 1 @inbounds begin for ii in axes(du, 2) @@ -123,16 +133,17 @@ function prolong_interfaces_kernel!(interfaces_u, u, neighbor_ids, orientations, k = (blockIdx().y - 1) * blockDim().y + threadIdx().y if (j <= size(interfaces_u, 2) * size(interfaces_u, 3)^2 && k <= size(interfaces_u, 5)) - j1 = div(j - 1, size(interfaces_u, 3)^2) + 1 - j2 = div(rem(j - 1, size(interfaces_u, 3)^2), size(interfaces_u, 3)) + 1 - j3 = rem(rem(j - 1, size(interfaces_u, 3)^2), size(interfaces_u, 3)) + 1 + # size(interfaces_u, 3) == size(u, 2) + u2 = size(u, 2) + + j1 = div(j - 1, u2^2) + 1 + j2 = div(rem(j - 1, u2^2), u2) + 1 + j3 = rem(rem(j - 1, u2^2), u2) + 1 orientation = orientations[k] left_element = neighbor_ids[1, k] right_element = neighbor_ids[2, k] - u2 = size(u, 2) - @inbounds begin interfaces_u[1, j1, j2, j3, k] = u[j1, isequal(orientation, 1) * u2 + isequal(orientation, 2) * j2 + isequal(orientation, 3) * j2, @@ -140,9 +151,11 @@ function prolong_interfaces_kernel!(interfaces_u, u, neighbor_ids, orientations, isequal(orientation, 1) * j3 + isequal(orientation, 2) * j3 + isequal(orientation, 3) * u2, left_element] interfaces_u[2, j1, j2, j3, k] = u[j1, - isequal(orientation, 1) * 1 + isequal(orientation, 2) * j2 + isequal(orientation, 3) * j2, - isequal(orientation, 1) * j2 + isequal(orientation, 2) * 1 + isequal(orientation, 3) * j3, - isequal(orientation, 1) * j3 + isequal(orientation, 2) * j3 + isequal(orientation, 3) * 1, + isequal(orientation, 1) + isequal(orientation, 2) * j2 + isequal(orientation, 3) * j2, + isequal(orientation, 1) * j2 + isequal(orientation, + 2) + isequal(orientation, 3) * j3, + isequal(orientation, 1) * j3 + isequal(orientation, 2) * j3 + isequal(orientation, + 3), right_element] end end @@ -210,27 +223,30 @@ function prolong_boundaries_kernel!(boundaries_u, u, neighbor_ids, neighbor_side k = (blockIdx().y - 1) * blockDim().y + threadIdx().y if (j <= size(boundaries_u, 2) * size(boundaries_u, 3)^2 && k <= size(boundaries_u, 5)) - j1 = div(j - 1, size(boundaries_u, 3)^2) + 1 - j2 = div(rem(j - 1, size(boundaries_u, 3)^2), size(boundaries_u, 3)) + 1 - j3 = rem(rem(j - 1, size(boundaries_u, 3)^2), size(boundaries_u, 3)) + 1 + # size(boundaries_u, 3) == size(u, 2) + u2 = size(u, 2) + + j1 = div(j - 1, u2^2) + 1 + j2 = div(rem(j - 1, u2^2), u2) + 1 + j3 = rem(rem(j - 1, u2^2), u2) + 1 element = neighbor_ids[k] side = neighbor_sides[k] orientation = orientations[k] - u2 = size(u, 2) - @inbounds begin boundaries_u[1, j1, j2, j3, k] = u[j1, isequal(orientation, 1) * u2 + isequal(orientation, 2) * j2 + isequal(orientation, 3) * j2, isequal(orientation, 1) * j2 + isequal(orientation, 2) * u2 + isequal(orientation, 3) * j3, isequal(orientation, 1) * j3 + isequal(orientation, 2) * j3 + isequal(orientation, 3) * u2, - element] * isequal(side, 1) # Set to 0 instead of NaN + element] * (2 - side) # Set to 0 instead of NaN boundaries_u[2, j1, j2, j3, k] = u[j1, - isequal(orientation, 1) * 1 + isequal(orientation, 2) * j2 + isequal(orientation, 3) * j2, - isequal(orientation, 1) * j2 + isequal(orientation, 2) * 1 + isequal(orientation, 3) * j3, - isequal(orientation, 1) * j3 + isequal(orientation, 2) * j3 + isequal(orientation, 3) * 1, - element] * (1 - isequal(side, 1)) # Set to 0 instead of NaN + isequal(orientation, 1) + isequal(orientation, 2) * j2 + isequal(orientation, 3) * j2, + isequal(orientation, 1) * j2 + isequal(orientation, + 2) + isequal(orientation, 3) * j3, + isequal(orientation, 1) * j3 + isequal(orientation, 2) * j3 + isequal(orientation, + 3), + element] * (side - 1) # Set to 0 instead of NaN end end @@ -259,7 +275,7 @@ function boundary_flux_kernel!(surface_flux_values, boundaries_u, node_coordinat orientation = orientations[boundary] u_ll, u_rr = get_surface_node_vars(boundaries_u, equations, j1, j2, boundary) - u_inner = isequal(side, 1) * u_ll + (1 - isequal(side, 1)) * u_rr + u_inner = (2 - side) * u_ll + (side - 1) * u_rr x = get_node_coords(node_coordinates, equations, j1, j2, boundary) # TODO: Improve this part @@ -308,8 +324,11 @@ function prolong_mortars_small2small_kernel!(u_upper_left, u_upper_right, u_lowe k = (blockIdx().z - 1) * blockDim().z + threadIdx().z if (i <= size(u_upper_left, 2) && j <= size(u_upper_left, 3)^2 && k <= size(u_upper_left, 5)) - j1 = div(j - 1, size(u_upper_left, 3)) + 1 - j2 = rem(j - 1, size(u_upper_left, 3)) + 1 + # size(u_upper_left, 3) == size(u, 2) + u2 = size(u, 2) + + j1 = div(j - 1, u2) + 1 + j2 = rem(j - 1, u2) + 1 large_side = large_sides[k] orientation = orientations[k] @@ -319,56 +338,62 @@ function prolong_mortars_small2small_kernel!(u_upper_left, u_upper_right, u_lowe upper_left_element = neighbor_ids[3, k] upper_right_element = neighbor_ids[4, k] - u2 = size(u, 2) - @inbounds begin u_upper_left[2, i, j1, j2, k] = u[i, - isequal(orientation, 1) * 1 + isequal(orientation, 2) * j1 + isequal(orientation, 3) * j1, - isequal(orientation, 1) * j1 + isequal(orientation, 2) * 1 + isequal(orientation, 3) * j2, - isequal(orientation, 1) * j2 + isequal(orientation, 2) * j2 + isequal(orientation, 3) * 1, - upper_left_element] * isequal(large_side, 1) + isequal(orientation, 1) + isequal(orientation, 2) * j1 + isequal(orientation, 3) * j1, + isequal(orientation, 1) * j1 + isequal(orientation, + 2) + isequal(orientation, 3) * j2, + isequal(orientation, 1) * j2 + isequal(orientation, 2) * j2 + isequal(orientation, + 3), + upper_left_element] * (2 - large_side) u_upper_right[2, i, j1, j2, k] = u[i, - isequal(orientation, 1) * 1 + isequal(orientation, 2) * j1 + isequal(orientation, 3) * j1, - isequal(orientation, 1) * j1 + isequal(orientation, 2) * 1 + isequal(orientation, 3) * j2, - isequal(orientation, 1) * j2 + isequal(orientation, 2) * j2 + isequal(orientation, 3) * 1, - upper_right_element] * isequal(large_side, 1) + isequal(orientation, 1) + isequal(orientation, 2) * j1 + isequal(orientation, 3) * j1, + isequal(orientation, 1) * j1 + isequal(orientation, + 2) + isequal(orientation, 3) * j2, + isequal(orientation, 1) * j2 + isequal(orientation, 2) * j2 + isequal(orientation, + 3), + upper_right_element] * (2 - large_side) u_lower_left[2, i, j1, j2, k] = u[i, - isequal(orientation, 1) * 1 + isequal(orientation, 2) * j1 + isequal(orientation, 3) * j1, - isequal(orientation, 1) * j1 + isequal(orientation, 2) * 1 + isequal(orientation, 3) * j2, - isequal(orientation, 1) * j2 + isequal(orientation, 2) * j2 + isequal(orientation, 3) * 1, - lower_left_element] * isequal(large_side, 1) + isequal(orientation, 1) + isequal(orientation, 2) * j1 + isequal(orientation, 3) * j1, + isequal(orientation, 1) * j1 + isequal(orientation, + 2) + isequal(orientation, 3) * j2, + isequal(orientation, 1) * j2 + isequal(orientation, 2) * j2 + isequal(orientation, + 3), + lower_left_element] * (2 - large_side) u_lower_right[2, i, j1, j2, k] = u[i, - isequal(orientation, 1) * 1 + isequal(orientation, 2) * j1 + isequal(orientation, 3) * j1, - isequal(orientation, 1) * j1 + isequal(orientation, 2) * 1 + isequal(orientation, 3) * j2, - isequal(orientation, 1) * j2 + isequal(orientation, 2) * j2 + isequal(orientation, 3) * 1, - lower_right_element] * isequal(large_side, 1) + isequal(orientation, 1) + isequal(orientation, 2) * j1 + isequal(orientation, 3) * j1, + isequal(orientation, 1) * j1 + isequal(orientation, + 2) + isequal(orientation, 3) * j2, + isequal(orientation, 1) * j2 + isequal(orientation, 2) * j2 + isequal(orientation, + 3), + lower_right_element] * (2 - large_side) u_upper_left[1, i, j1, j2, k] = u[i, isequal(orientation, 1) * u2 + isequal(orientation, 2) * j1 + isequal(orientation, 3) * j1, isequal(orientation, 1) * j1 + isequal(orientation, 2) * u2 + isequal(orientation, 3) * j2, isequal(orientation, 1) * j2 + isequal(orientation, 2) * j2 + isequal(orientation, 3) * u2, - upper_left_element] * isequal(large_side, 2) + upper_left_element] * (large_side - 1) u_upper_right[1, i, j1, j2, k] = u[i, isequal(orientation, 1) * u2 + isequal(orientation, 2) * j1 + isequal(orientation, 3) * j1, isequal(orientation, 1) * j1 + isequal(orientation, 2) * u2 + isequal(orientation, 3) * j2, isequal(orientation, 1) * j2 + isequal(orientation, 2) * j2 + isequal(orientation, 3) * u2, - upper_right_element] * isequal(large_side, 2) + upper_right_element] * (large_side - 1) u_lower_left[1, i, j1, j2, k] = u[i, isequal(orientation, 1) * u2 + isequal(orientation, 2) * j1 + isequal(orientation, 3) * j1, isequal(orientation, 1) * j1 + isequal(orientation, 2) * u2 + isequal(orientation, 3) * j2, isequal(orientation, 1) * j2 + isequal(orientation, 2) * j2 + isequal(orientation, 3) * u2, - lower_left_element] * isequal(large_side, 2) + lower_left_element] * (large_side - 1) u_lower_right[1, i, j1, j2, k] = u[i, isequal(orientation, 1) * u2 + isequal(orientation, 2) * j1 + isequal(orientation, 3) * j1, isequal(orientation, 1) * j1 + isequal(orientation, 2) * u2 + isequal(orientation, 3) * j2, isequal(orientation, 1) * j2 + isequal(orientation, 2) * j2 + isequal(orientation, 3) * u2, - lower_right_element] * isequal(large_side, 2) + lower_right_element] * (large_side - 1) end end @@ -385,15 +410,17 @@ function prolong_mortars_large2small_kernel!(u_upper_left, u_upper_right, u_lowe k = (blockIdx().z - 1) * blockDim().z + threadIdx().z if (i <= size(u_upper_left, 2) && j <= size(u_upper_left, 3)^2 && k <= size(u_upper_left, 5)) - j1 = div(j - 1, size(u_upper_left, 3)) + 1 - j2 = rem(j - 1, size(u_upper_left, 3)) + 1 + # size(u_upper_left, 3) == size(u, 2) + u2 = size(u, 2) + + j1 = div(j - 1, u2) + 1 + j2 = rem(j - 1, u2) + 1 large_side = large_sides[k] orientation = orientations[k] large_element = neighbor_ids[5, k] leftright = large_side - u2 = size(u, 2) @inbounds begin for j1j1 in axes(forward_lower, 2) @@ -402,62 +429,66 @@ function prolong_mortars_large2small_kernel!(u_upper_left, u_upper_right, u_lowe isequal(orientation, 1) * u2 + isequal(orientation, 2) * j1j1 + isequal(orientation, 3) * j1j1, isequal(orientation, 1) * j1j1 + isequal(orientation, 2) * u2 + isequal(orientation, 3) * j2, isequal(orientation, 1) * j2 + isequal(orientation, 2) * j2 + isequal(orientation, 3) * u2, - large_element] * isequal(large_side, 1) + large_element] * (2 - large_side) tmp_upper_right[leftright, i, j1, j2, k] += forward_upper[j1, j1j1] * u[i, isequal(orientation, 1) * u2 + isequal(orientation, 2) * j1j1 + isequal(orientation, 3) * j1j1, isequal(orientation, 1) * j1j1 + isequal(orientation, 2) * u2 + isequal(orientation, 3) * j2, isequal(orientation, 1) * j2 + isequal(orientation, 2) * j2 + isequal(orientation, 3) * u2, - large_element] * - isequal(large_side, 1) + large_element] * (2 - large_side) tmp_lower_left[leftright, i, j1, j2, k] += forward_lower[j1, j1j1] * u[i, isequal(orientation, 1) * u2 + isequal(orientation, 2) * j1j1 + isequal(orientation, 3) * j1j1, isequal(orientation, 1) * j1j1 + isequal(orientation, 2) * u2 + isequal(orientation, 3) * j2, isequal(orientation, 1) * j2 + isequal(orientation, 2) * j2 + isequal(orientation, 3) * u2, - large_element] * isequal(large_side, 1) + large_element] * (2 - large_side) tmp_lower_right[leftright, i, j1, j2, k] += forward_upper[j1, j1j1] * u[i, isequal(orientation, 1) * u2 + isequal(orientation, 2) * j1j1 + isequal(orientation, 3) * j1j1, isequal(orientation, 1) * j1j1 + isequal(orientation, 2) * u2 + isequal(orientation, 3) * j2, isequal(orientation, 1) * j2 + isequal(orientation, 2) * j2 + isequal(orientation, 3) * u2, - large_element] * - isequal(large_side, 1) + large_element] * (2 - large_side) end for j1j1 in axes(forward_lower, 2) tmp_upper_left[leftright, i, j1, j2, k] += forward_lower[j1, j1j1] * u[i, - isequal(orientation, 1) * 1 + isequal(orientation, 2) * j1j1 + isequal(orientation, 3) * j1j1, - isequal(orientation, 1) * j1j1 + isequal(orientation, 2) * 1 + isequal(orientation, 3) * j2, - isequal(orientation, 1) * j2 + isequal(orientation, 2) * j2 + isequal(orientation, 3) * 1, - large_element] * isequal(large_side, 2) + isequal(orientation, 1) + isequal(orientation, 2) * j1j1 + isequal(orientation, 3) * j1j1, + isequal(orientation, 1) * j1j1 + isequal(orientation, + 2) + isequal(orientation, 3) * j2, + isequal(orientation, 1) * j2 + isequal(orientation, 2) * j2 + isequal(orientation, + 3), + large_element] * (large_side - 1) tmp_upper_right[leftright, i, j1, j2, k] += forward_upper[j1, j1j1] * u[i, - isequal(orientation, 1) * 1 + isequal(orientation, 2) * j1j1 + isequal(orientation, 3) * j1j1, - isequal(orientation, 1) * j1j1 + isequal(orientation, 2) * 1 + isequal(orientation, 3) * j2, - isequal(orientation, 1) * j2 + isequal(orientation, 2) * j2 + isequal(orientation, 3) * 1, - large_element] * - isequal(large_side, 2) + isequal(orientation, 1) + isequal(orientation, 2) * j1j1 + isequal(orientation, 3) * j1j1, + isequal(orientation, 1) * j1j1 + isequal(orientation, + 2) + isequal(orientation, 3) * j2, + isequal(orientation, 1) * j2 + isequal(orientation, 2) * j2 + isequal(orientation, + 3), + large_element] * (large_side - 1) tmp_lower_left[leftright, i, j1, j2, k] += forward_lower[j1, j1j1] * u[i, - isequal(orientation, 1) * 1 + isequal(orientation, 2) * j1j1 + isequal(orientation, 3) * j1j1, - isequal(orientation, 1) * j1j1 + isequal(orientation, 2) * 1 + isequal(orientation, 3) * j2, - isequal(orientation, 1) * j2 + isequal(orientation, 2) * j2 + isequal(orientation, 3) * 1, - large_element] * isequal(large_side, 2) + isequal(orientation, 1) + isequal(orientation, 2) * j1j1 + isequal(orientation, 3) * j1j1, + isequal(orientation, 1) * j1j1 + isequal(orientation, + 2) + isequal(orientation, 3) * j2, + isequal(orientation, 1) * j2 + isequal(orientation, 2) * j2 + isequal(orientation, + 3), + large_element] * (large_side - 1) tmp_lower_right[leftright, i, j1, j2, k] += forward_upper[j1, j1j1] * u[i, - isequal(orientation, 1) * 1 + isequal(orientation, 2) * j1j1 + isequal(orientation, 3) * j1j1, - isequal(orientation, 1) * j1j1 + isequal(orientation, 2) * 1 + isequal(orientation, 3) * j2, - isequal(orientation, 1) * j2 + isequal(orientation, 2) * j2 + isequal(orientation, 3) * 1, - large_element] * - isequal(large_side, 2) + isequal(orientation, 1) + isequal(orientation, 2) * j1j1 + isequal(orientation, 3) * j1j1, + isequal(orientation, 1) * j1j1 + isequal(orientation, + 2) + isequal(orientation, 3) * j2, + isequal(orientation, 1) * j2 + isequal(orientation, 2) * j2 + isequal(orientation, + 3), + large_element] * (large_side - 1) end for j2j2 in axes(forward_upper, 2) @@ -639,9 +670,12 @@ function surface_integral_kernel!(du, factor_arr, surface_flux_values, k = (blockIdx().z - 1) * blockDim().z + threadIdx().z if (i <= size(du, 1) && j <= size(du, 2)^3 && k <= size(du, 5)) - j1 = div(j - 1, size(du, 2)^2) + 1 - j2 = div(rem(j - 1, size(du, 2)^2), size(du, 2)) + 1 - j3 = rem(rem(j - 1, size(du, 2)^2), size(du, 2)) + 1 + # size(du, 2) == size(u, 2) + u2 = size(du, 2) + + j1 = div(j - 1, u2^2) + 1 + j2 = div(rem(j - 1, u2^2), u2) + 1 + j3 = rem(rem(j - 1, u2^2), u2) + 1 @inbounds begin du[i, j1, j2, j3, k] -= (surface_flux_values[i, j2, j3, 1, k] * isequal(j1, 1) + @@ -649,10 +683,10 @@ function surface_integral_kernel!(du, factor_arr, surface_flux_values, surface_flux_values[i, j1, j2, 5, k] * isequal(j3, 1)) * factor_arr[1] du[i, j1, j2, j3, k] += (surface_flux_values[i, j2, j3, 2, k] * - isequal(j1, size(du, 2)) + + isequal(j1, u2) + surface_flux_values[i, j1, j3, 4, k] * - isequal(j2, size(du, 2)) + - surface_flux_values[i, j1, j2, 6, k] * isequal(j3, size(du, 2))) * + isequal(j2, u2) + + surface_flux_values[i, j1, j2, 6, k] * isequal(j3, u2)) * factor_arr[2] end end @@ -667,9 +701,12 @@ function jacobian_kernel!(du, inverse_jacobian, equations::AbstractEquations{3}) k = (blockIdx().z - 1) * blockDim().z + threadIdx().z if (i <= size(du, 1) && j <= size(du, 2)^3 && k <= size(du, 5)) - j1 = div(j - 1, size(du, 2)^2) + 1 - j2 = div(rem(j - 1, size(du, 2)^2), size(du, 2)) + 1 - j3 = rem(rem(j - 1, size(du, 2)^2), size(du, 2)) + 1 + # size(du, 2) == size(u, 2) + u2 = size(du, 2) + + j1 = div(j - 1, u2^2) + 1 + j2 = div(rem(j - 1, u2^2), u2) + 1 + j3 = rem(rem(j - 1, u2^2), u2) + 1 @inbounds du[i, j1, j2, j3, k] *= -inverse_jacobian[k] end @@ -684,9 +721,12 @@ function source_terms_kernel!(du, u, node_coordinates, t, equations::AbstractEqu k = (blockIdx().y - 1) * blockDim().y + threadIdx().y if (j <= size(du, 2)^3 && k <= size(du, 5)) - j1 = div(j - 1, size(du, 2)^2) + 1 - j2 = div(rem(j - 1, size(du, 2)^2), size(du, 2)) + 1 - j3 = rem(rem(j - 1, size(du, 2)^2), size(du, 2)) + 1 + # size(du, 2) == size(u, 2) + u2 = size(u, 2) + + j1 = div(j - 1, u2^2) + 1 + j2 = div(rem(j - 1, u2^2), u2) + 1 + j3 = rem(rem(j - 1, u2^2), u2) + 1 u_local = get_node_vars(u, equations, j1, j2, j3, k) x_local = get_node_coords(node_coordinates, equations, j1, j2, j3, k) diff --git a/test/runtests.jl b/test/runtests.jl index 685f497..d6b57b4 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,4 +1,9 @@ using TrixiGPU using Test +# Note that it is complicated to get tight error bounds for GPU kernels, here we use `isapprox` +# with the default mode to validate the precision by comparing the results from GPU kernels and +# CPU kernels, which corresponds to requiring equality of about half of the significant digits +# (see https://docs.julialang.org/en/v1/base/math/#Base.isapprox). + @testset "TrixiGPU.jl" begin end diff --git a/test/test_adevction_mortar.jl b/test/test_adevction_mortar.jl index affb435..b160891 100644 --- a/test/test_adevction_mortar.jl +++ b/test/test_adevction_mortar.jl @@ -8,11 +8,6 @@ using Test, CUDA outdir = "out" isdir(outdir) && rm(outdir, recursive = true) -# Note that it is complicated to get tight error bounds for GPU kernels, here we use `isapprox` -# with the default mode to validate the precision by comparing the results from GPU kernels and -# CPU kernels, which corresponds to requiring equality of about half of the significant digits -# (see https://docs.julialang.org/en/v1/base/math/#Base.isapprox). - # Test precision of the semidiscretization process @testset "Test Linear Advection Mortar" begin @testset "Linear Advection 2D" begin diff --git a/test/test_advection_basic.jl b/test/test_advection_basic.jl index bde97bf..68e8c24 100644 --- a/test/test_advection_basic.jl +++ b/test/test_advection_basic.jl @@ -8,11 +8,6 @@ using Test, CUDA outdir = "out" isdir(outdir) && rm(outdir, recursive = true) -# Note that it is complicated to get tight error bounds for GPU kernels, here we use `isapprox` -# with the default mode to validate the precision by comparing the results from GPU kernels and -# CPU kernels, which corresponds to requiring equality of about half of the significant digits -# (see https://docs.julialang.org/en/v1/base/math/#Base.isapprox). - # Test precision of the semidiscretization process @testset "Test Linear Advection Basic" begin @testset "Linear Advection 1D" begin diff --git a/test/test_euler_ec.jl b/test/test_euler_ec.jl index 5ebb761..0c132a5 100644 --- a/test/test_euler_ec.jl +++ b/test/test_euler_ec.jl @@ -8,11 +8,6 @@ using Test, CUDA outdir = "out" isdir(outdir) && rm(outdir, recursive = true) -# Note that it is complicated to get tight error bounds for GPU kernels, here we use `isapprox` -# with the default mode to validate the precision by comparing the results from GPU kernels and -# CPU kernels, which corresponds to requiring equality of about half of the significant digits -# (see https://docs.julialang.org/en/v1/base/math/#Base.isapprox). - # Test precision of the semidiscretization process @testset "Test Compressible Euler Flux Differencing" begin @testset "Compressible Euler 1D" begin diff --git a/test/test_euler_source_terms.jl b/test/test_euler_source_terms.jl index 6f410ea..d872981 100644 --- a/test/test_euler_source_terms.jl +++ b/test/test_euler_source_terms.jl @@ -8,11 +8,6 @@ using Test, CUDA outdir = "out" isdir(outdir) && rm(outdir, recursive = true) -# Note that it is complicated to get tight error bounds for GPU kernels, here we use `isapprox` -# with the default mode to validate the precision by comparing the results from GPU kernels and -# CPU kernels, which corresponds to requiring equality of about half of the significant digits -# (see https://docs.julialang.org/en/v1/base/math/#Base.isapprox). - # Test precision of the semidiscretization process @testset "Test Compressible Euler Source Terms" begin @testset "Compressible Euler 1D" begin diff --git a/test/test_hypdiff_nonperiodic.jl b/test/test_hypdiff_nonperiodic.jl index d6263ab..ac6c1b6 100644 --- a/test/test_hypdiff_nonperiodic.jl +++ b/test/test_hypdiff_nonperiodic.jl @@ -8,11 +8,6 @@ using Test, CUDA outdir = "out" isdir(outdir) && rm(outdir, recursive = true) -# Note that it is complicated to get tight error bounds for GPU kernels, here we use `isapprox` -# with the default mode to validate the precision by comparing the results from GPU kernels and -# CPU kernels, which corresponds to requiring equality of about half of the significant digits -# (see https://docs.julialang.org/en/v1/base/math/#Base.isapprox). - # Test precision of the semidiscretization process @testset "Test Hyperbolic Diffusion Boundary Conditions" begin @testset "Compressible Euler 1D" begin diff --git a/test/test_shallowwater_well_balanced.jl b/test/test_shallowwater_well_balanced.jl index d90b3d7..c155526 100644 --- a/test/test_shallowwater_well_balanced.jl +++ b/test/test_shallowwater_well_balanced.jl @@ -8,11 +8,6 @@ using Test, CUDA outdir = "out" isdir(outdir) && rm(outdir, recursive = true) -# Note that it is complicated to get tight error bounds for GPU kernels, here we use `isapprox` -# with the default mode to validate the precision by comparing the results from GPU kernels and -# CPU kernels, which corresponds to requiring equality of about half of the significant digits -# (see https://docs.julialang.org/en/v1/base/math/#Base.isapprox). - # Test precision of the semidiscretization process @testset "Test Shallow Water Well Balanced" begin @testset "Shallow Water 1D" begin