Skip to content

Commit

Permalink
Use math expression to enhance performance (#30)
Browse files Browse the repository at this point in the history
* Start

* Change for 1D

* Change for 2D

* Change for 3D

* Replace used variables
  • Loading branch information
huiyuxie authored Sep 4, 2024
1 parent 4e031ca commit 49e25fb
Show file tree
Hide file tree
Showing 10 changed files with 204 additions and 182 deletions.
6 changes: 3 additions & 3 deletions src/solvers/dg_1d.jl
Original file line number Diff line number Diff line change
Expand Up @@ -245,8 +245,8 @@ function prolong_boundaries_kernel!(boundaries_u, u, neighbor_ids, neighbor_side
side = neighbor_sides[k]

@inbounds begin
boundaries_u[1, j, k] = u[j, size(u, 2), element] * isequal(side, 1) # set to 0 instead of NaN
boundaries_u[2, j, k] = u[j, 1, element] * (1 - isequal(side, 1)) # set to 0 instead of NaN
boundaries_u[1, j, k] = u[j, size(u, 2), element] * (2 - side) # set to 0 instead of NaN
boundaries_u[2, j, k] = u[j, 1, element] * (side - 1) # set to 0 instead of NaN
end
end

Expand All @@ -269,7 +269,7 @@ function boundary_flux_kernel!(surface_flux_values, boundaries_u, node_coordinat
orientation = orientations[boundary]

u_ll, u_rr = get_surface_node_vars(boundaries_u, equations, boundary)
u_inner = isequal(side, 1) * u_ll + (1 - isequal(side, 1)) * u_rr
u_inner = (2 - side) * u_ll + (side - 1) * u_rr
x = get_node_coords(node_coordinates, equations, boundary)

# TODO: Improve this part
Expand Down
115 changes: 61 additions & 54 deletions src/solvers/dg_2d.jl
Original file line number Diff line number Diff line change
Expand Up @@ -57,9 +57,11 @@ function volume_flux_kernel!(volume_flux_arr1, volume_flux_arr2, u, equations::A
k = (blockIdx().y - 1) * blockDim().y + threadIdx().y

if (j <= size(u, 2)^3 && k <= size(u, 4))
j1 = div(j - 1, size(u, 2)^2) + 1
j2 = div(rem(j - 1, size(u, 2)^2), size(u, 2)) + 1
j3 = rem(rem(j - 1, size(u, 2)^2), size(u, 2)) + 1
u2 = size(u, 2)

j1 = div(j - 1, u2^2) + 1
j2 = div(rem(j - 1, u2^2), u2) + 1
j3 = rem(rem(j - 1, u2^2), u2) + 1

u_node = get_node_vars(u, equations, j1, j2, k)
u_node1 = get_node_vars(u, equations, j3, j2, k)
Expand Down Expand Up @@ -88,9 +90,11 @@ function symmetric_noncons_flux_kernel!(symmetric_flux_arr1, symmetric_flux_arr2
k = (blockIdx().y - 1) * blockDim().y + threadIdx().y

if (j <= size(u, 2)^3 && k <= size(u, 4))
j1 = div(j - 1, size(u, 2)^2) + 1
j2 = div(rem(j - 1, size(u, 2)^2), size(u, 2)) + 1
j3 = rem(rem(j - 1, size(u, 2)^2), size(u, 2)) + 1
u2 = size(u, 2)

j1 = div(j - 1, u2^2) + 1
j2 = div(rem(j - 1, u2^2), u2) + 1
j3 = rem(rem(j - 1, u2^2), u2) + 1

u_node = get_node_vars(u, equations, j1, j2, k)
u_node1 = get_node_vars(u, equations, j3, j2, k)
Expand Down Expand Up @@ -176,23 +180,24 @@ function prolong_interfaces_kernel!(interfaces_u, u, neighbor_ids, orientations,
k = (blockIdx().y - 1) * blockDim().y + threadIdx().y

if (j <= size(interfaces_u, 2) * size(interfaces_u, 3) && k <= size(interfaces_u, 4))
j1 = div(j - 1, size(interfaces_u, 3)) + 1
j2 = rem(j - 1, size(interfaces_u, 3)) + 1
# size(interfaces_u, 3) == size(u, 2)
u2 = size(u, 2)

j1 = div(j - 1, u2) + 1
j2 = rem(j - 1, u2) + 1

orientation = orientations[k]
left_element = neighbor_ids[1, k]
right_element = neighbor_ids[2, k]

u2 = size(u, 2)

@inbounds begin
interfaces_u[1, j1, j2, k] = u[j1,
isequal(orientation, 1) * u2 + isequal(orientation, 2) * j2,
isequal(orientation, 1) * j2 + isequal(orientation, 2) * u2,
(2 - orientation) * u2 + (orientation - 1) * j2,
(2 - orientation) * j2 + (orientation - 1) * u2,
left_element]
interfaces_u[2, j1, j2, k] = u[j1,
isequal(orientation, 1) * 1 + isequal(orientation, 2) * j2,
isequal(orientation, 1) * j2 + isequal(orientation, 2) * 1,
(2 - orientation) + (orientation - 1) * j2,
(2 - orientation) * j2 + (orientation - 1),
right_element]
end
end
Expand Down Expand Up @@ -310,24 +315,25 @@ function prolong_boundaries_kernel!(boundaries_u, u, neighbor_ids, neighbor_side
k = (blockIdx().y - 1) * blockDim().y + threadIdx().y

if (j <= size(boundaries_u, 2) * size(boundaries_u, 3) && k <= size(boundaries_u, 4))
j1 = div(j - 1, size(boundaries_u, 3)) + 1
j2 = rem(j - 1, size(boundaries_u, 3)) + 1
# size(boundaries_u, 3) == size(u, 2)
u2 = size(u, 2)

j1 = div(j - 1, u2) + 1
j2 = rem(j - 1, u2) + 1

element = neighbor_ids[k]
side = neighbor_sides[k]
orientation = orientations[k]

u2 = size(u, 2)

@inbounds begin
boundaries_u[1, j1, j2, k] = u[j1,
isequal(orientation, 1) * u2 + isequal(orientation, 2) * j2,
isequal(orientation, 1) * j2 + isequal(orientation, 2) * u2,
element] * isequal(side, 1) # Set to 0 instead of NaN
(2 - orientation) * u2 + (orientation - 1) * j2,
(2 - orientation) * j2 + (orientation - 1) * u2,
element] * (2 - side) # Set to 0 instead of NaN
boundaries_u[2, j1, j2, k] = u[j1,
isequal(orientation, 1) * 1 + isequal(orientation, 2) * j2,
isequal(orientation, 1) * j2 + isequal(orientation, 2) * 1,
element] * (1 - isequal(side, 1)) # Set to 0 instead of NaN
(2 - orientation) + (orientation - 1) * j2,
(2 - orientation) * j2 + (orientation - 1),
element] * (side - 1) # Set to 0 instead of NaN
end
end

Expand All @@ -352,7 +358,7 @@ function boundary_flux_kernel!(surface_flux_values, boundaries_u, node_coordinat
orientation = orientations[boundary]

u_ll, u_rr = get_surface_node_vars(boundaries_u, equations, j, boundary)
u_inner = isequal(side, 1) * u_ll + (1 - isequal(side, 1)) * u_rr
u_inner = (2 - side) * u_ll + (side - 1) * u_rr
x = get_node_coords(node_coordinates, equations, j, boundary)

# TODO: Improve this part
Expand Down Expand Up @@ -402,24 +408,24 @@ function prolong_mortars_small2small_kernel!(u_upper, u_lower, u, neighbor_ids,

@inbounds begin
u_upper[2, i, j, k] = u[i,
isequal(orientation, 1) * 1 + isequal(orientation, 2) * j,
isequal(orientation, 1) * j + isequal(orientation, 2) * 1,
upper_element] * isequal(large_side, 1)
(2 - orientation) + (orientation - 1) * j,
(2 - orientation) * j + (orientation - 1),
upper_element] * (2 - large_side)

u_lower[2, i, j, k] = u[i,
isequal(orientation, 1) * 1 + isequal(orientation, 2) * j,
isequal(orientation, 1) * j + isequal(orientation, 2) * 1,
lower_element] * isequal(large_side, 1)
(2 - orientation) + (orientation - 1) * j,
(2 - orientation) * j + (orientation - 1),
lower_element] * (2 - large_side)

u_upper[1, i, j, k] = u[i,
isequal(orientation, 1) * u2 + isequal(orientation, 2) * j,
isequal(orientation, 1) * j + isequal(orientation, 2) * u2,
upper_element] * isequal(large_side, 2)
(2 - orientation) * u2 + (orientation - 1) * j,
(2 - orientation) * j + (orientation - 1) * u2,
upper_element] * (large_side - 1)

u_lower[1, i, j, k] = u[i,
isequal(orientation, 1) * u2 + isequal(orientation, 2) * j,
isequal(orientation, 1) * j + isequal(orientation, 2) * u2,
lower_element] * isequal(large_side, 2)
(2 - orientation) * u2 + (orientation - 1) * j,
(2 - orientation) * j + (orientation - 1) * u2,
lower_element] * (large_side - 1)
end
end

Expand All @@ -445,27 +451,27 @@ function prolong_mortars_large2small_kernel!(u_upper, u_lower, u, forward_upper,
for jj in axes(forward_upper, 2)
u_upper[leftright, i, j, k] += forward_upper[j, jj] *
u[i,
isequal(orientation, 1) * u2 + isequal(orientation, 2) * jj,
isequal(orientation, 1) * jj + isequal(orientation, 2) * u2,
large_element] * isequal(large_side, 1)
(2 - orientation) * u2 + (orientation - 1) * jj,
(2 - orientation) * jj + (orientation - 1) * u2,
large_element] * (2 - large_side)
u_lower[leftright, i, j, k] += forward_lower[j, jj] *
u[i,
isequal(orientation, 1) * u2 + isequal(orientation, 2) * jj,
isequal(orientation, 1) * jj + isequal(orientation, 2) * u2,
large_element] * isequal(large_side, 1)
(2 - orientation) * u2 + (orientation - 1) * jj,
(2 - orientation) * jj + (orientation - 1) * u2,
large_element] * (2 - large_side)
end

for jj in axes(forward_lower, 2)
u_upper[leftright, i, j, k] += forward_upper[j, jj] *
u[i,
isequal(orientation, 1) * 1 + isequal(orientation, 2) * jj,
isequal(orientation, 1) * jj + isequal(orientation, 2) * 1,
large_element] * isequal(large_side, 2)
(2 - orientation) + (orientation - 1) * jj,
(2 - orientation) * jj + (orientation - 1),
large_element] * (large_side - 1)
u_lower[leftright, i, j, k] += forward_lower[j, jj] *
u[i,
isequal(orientation, 1) * 1 + isequal(orientation, 2) * jj,
isequal(orientation, 1) * jj + isequal(orientation, 2) * 1,
large_element] * isequal(large_side, 2)
(2 - orientation) + (orientation - 1) * jj,
(2 - orientation) * jj + (orientation - 1),
large_element] * (large_side - 1)
end
end
end
Expand Down Expand Up @@ -557,15 +563,16 @@ function surface_integral_kernel!(du, factor_arr, surface_flux_values,
k = (blockIdx().z - 1) * blockDim().z + threadIdx().z

if (i <= size(du, 1) && j <= size(du, 2)^2 && k <= size(du, 4))
j1 = div(j - 1, size(du, 2)) + 1
j2 = rem(j - 1, size(du, 2)) + 1
u2 = size(du, 2)

j1 = div(j - 1, u2) + 1
j2 = rem(j - 1, u2) + 1

@inbounds begin
du[i, j1, j2, k] -= (surface_flux_values[i, j2, 1, k] * isequal(j1, 1) +
surface_flux_values[i, j1, 3, k] * isequal(j2, 1)) * factor_arr[1]
du[i, j1, j2, k] += (surface_flux_values[i, j2, 2, k] * isequal(j1, size(du, 2)) +
surface_flux_values[i, j1, 4, k] * isequal(j2, size(du, 2))) *
factor_arr[2]
du[i, j1, j2, k] += (surface_flux_values[i, j2, 2, k] * isequal(j1, u2) +
surface_flux_values[i, j1, 4, k] * isequal(j2, u2)) * factor_arr[2]
end
end

Expand Down
Loading

0 comments on commit 49e25fb

Please sign in to comment.