Skip to content

Commit

Permalink
Merge #715
Browse files Browse the repository at this point in the history
715: Adds Thomas algorithm for solving a tri-diagonal system of linear equations. r=charleskawczynski a=sriharshakandala



# PULL REQUEST

## Purpose and Content
Adds Thomas algorithm for solving a tri-diagonal system of linear equations. This replaces the pre-existing `lu!` solver in `linsolve`.

## Benefits and Risks
The Thomas algorithm computes the solution in-place and eliminates memory allocations noticed in the pre-existing `lu!` solver.

## Linked Issues
(Provide references to any link issues. Use closes #issuenum to automatically close an open issue)
- Fixes #648 
- Closes #648 

## PR Checklist
- [x] This PR has a corresponding issue OR is linked to an SDI.
- [x] I have followed CliMA's codebase [contribution](https://clima.github.io/ClimateMachine.jl/latest/Contributing/) and [style](https://clima.github.io/ClimateMachine.jl/latest/DevDocs/CodeStyle/) guidelines OR N/A.
- [x] I have followed CliMA's [documentation policy](https://github.com/CliMA/policies/wiki/Documentation-Policy).
- [x] I have checked all issues and PRs and I certify that this PR does not duplicate an open PR.
- [x] I linted my code on my local machine prior to submission OR N/A.
- [x] Unit tests are included OR N/A.
- [x] Code used in an integration test OR N/A.
- [x] All tests ran successfully on my local machine OR N/A.
- [x] All classes, modules, and function contain docstrings OR N/A.
- [x] Documentation has been added/updated OR N/A.


Co-authored-by: sriharshakandala <[email protected]>
  • Loading branch information
bors[bot] and sriharshakandala authored Jul 28, 2022
2 parents 1dca15e + 3bc59d5 commit 99a8f88
Show file tree
Hide file tree
Showing 4 changed files with 89 additions and 33 deletions.
30 changes: 29 additions & 1 deletion examples/hybrid/schur_complement_W.jl
Original file line number Diff line number Diff line change
Expand Up @@ -227,7 +227,7 @@ function linsolve!(::Type{Val{:init}}, f, u0; kwargs...)
S_column_array.d .= parent(S_column.coefs.:2)
@views S_column_array.du .=
parent(S_column.coefs.:3)[1:(end - 1)]
ldiv!(lu!(S_column_array), xᶠ𝕄_column_view)
thomas_algorithm!(S_column_array, xᶠ𝕄_column_view)

# Compute remaining components of x

Expand Down Expand Up @@ -319,3 +319,31 @@ function linsolve!(::Type{Val{:init}}, f, u0; kwargs...)
end
end
end
"""
thomas_algorithm!(A, b)
Thomas algorithm for solving a linear system A x = b,
where A is a tri-diagonal matrix.
A and b are overwritten.
Solution is written to b
"""
function thomas_algorithm!(A, b)
nrows = size(A, 1)
# first row
@inbounds A[1, 2] /= A[1, 1]
@inbounds b[1] /= A[1, 1]
# interior rows
for row in 2:(nrows - 1)
@inbounds fac = A[row, row] - (A[row, row - 1] * A[row - 1, row])
@inbounds A[row, row + 1] /= fac
@inbounds b[row] = (b[row] - A[row, row - 1] * b[row - 1]) / fac
end
# last row
@inbounds fac = A[nrows, nrows] - A[nrows - 1, nrows] * A[nrows, nrows - 1]
@inbounds b[nrows] = (b[nrows] - A[nrows, nrows - 1] * b[nrows - 1]) / fac
# back substitution
for row in (nrows - 1):-1:1
@inbounds b[row] -= b[row + 1] * A[row, row + 1]
end
return nothing
end
64 changes: 32 additions & 32 deletions regression_tests/mse_tables.jl
Original file line number Diff line number Diff line change
Expand Up @@ -6,48 +6,48 @@
all_best_mse = OrderedCollections.OrderedDict()
#
all_best_mse["sphere_held_suarez_rhotheta"] = OrderedCollections.OrderedDict()
all_best_mse["sphere_held_suarez_rhotheta"][(:c, )] = 1.4872195411869964e-7
all_best_mse["sphere_held_suarez_rhotheta"][(:c, :ρθ)] = 5.691492642213542e-9
all_best_mse["sphere_held_suarez_rhotheta"][(:c, :uₕ, :components, :data, 1)] = 0.006441619348953592
all_best_mse["sphere_held_suarez_rhotheta"][(:c, :uₕ, :components, :data, 2)] = 0.31157572259134936
all_best_mse["sphere_held_suarez_rhotheta"][(:f, :w, :components, :data, 1)] = 7.755543065279825
all_best_mse["sphere_held_suarez_rhotheta"][(:c, )] = 1.8443988202345924e-07
all_best_mse["sphere_held_suarez_rhotheta"][(:c, :ρθ)] = 6.3245731672250676e-09
all_best_mse["sphere_held_suarez_rhotheta"][(:c, :uₕ, :components, :data, 1)] = 7.3897763240045938e-03
all_best_mse["sphere_held_suarez_rhotheta"][(:c, :uₕ, :components, :data, 2)] = 3.2126928766337909e-01
all_best_mse["sphere_held_suarez_rhotheta"][(:f, :w, :components, :data, 1)] = 8.3186209553833539e+00
#
all_best_mse["sphere_held_suarez_rhoe_equilmoist"] = OrderedCollections.OrderedDict()
all_best_mse["sphere_held_suarez_rhoe_equilmoist"][(:c, )] = 4.501805784676515e-9
all_best_mse["sphere_held_suarez_rhoe_equilmoist"][(:c, :ρe_tot)] = 1.0378174953305407e-6
all_best_mse["sphere_held_suarez_rhoe_equilmoist"][(:c, :uₕ, :components, :data, 1)] = 0.001229646833346352
all_best_mse["sphere_held_suarez_rhoe_equilmoist"][(:c, :uₕ, :components, :data, 2)] = 0.04509844529622248
all_best_mse["sphere_held_suarez_rhoe_equilmoist"][(:c, :ρq_tot)] = 3.513704923386486e-5
all_best_mse["sphere_held_suarez_rhoe_equilmoist"][(:f, :w, :components, :data, 1)] = 36.33058108236622
all_best_mse["sphere_held_suarez_rhoe_equilmoist"][(:c, )] = 4.2461223415004236e-09
all_best_mse["sphere_held_suarez_rhoe_equilmoist"][(:c, :ρe_tot)] = 8.4273281818351322e-07
all_best_mse["sphere_held_suarez_rhoe_equilmoist"][(:c, :uₕ, :components, :data, 1)] = 1.3280117592054644e-03
all_best_mse["sphere_held_suarez_rhoe_equilmoist"][(:c, :uₕ, :components, :data, 2)] = 4.3465607647255017e-02
all_best_mse["sphere_held_suarez_rhoe_equilmoist"][(:c, :ρq_tot)] = 2.9656750499018924e-05
all_best_mse["sphere_held_suarez_rhoe_equilmoist"][(:f, :w, :components, :data, 1)] = 3.6027855811957018e+01
#
all_best_mse["sphere_baroclinic_wave_rhoe"] = OrderedCollections.OrderedDict()
all_best_mse["sphere_baroclinic_wave_rhoe"][(:c, )] = 7.056394074287637e-7
all_best_mse["sphere_baroclinic_wave_rhoe"][(:c, :ρe_tot)] = 1.2328592627061116e-5
all_best_mse["sphere_baroclinic_wave_rhoe"][(:c, :uₕ, :components, :data, 1)] = 0.00010440967343407822
all_best_mse["sphere_baroclinic_wave_rhoe"][(:c, :uₕ, :components, :data, 2)] = 0.04562470960985503
all_best_mse["sphere_baroclinic_wave_rhoe"][(:f, :w, :components, :data, 1)] = 1.4189863615381206
all_best_mse["sphere_baroclinic_wave_rhoe"][(:c, )] = 5.5933566543671513e-07
all_best_mse["sphere_baroclinic_wave_rhoe"][(:c, :ρe_tot)] = 9.5988308439307118e-06
all_best_mse["sphere_baroclinic_wave_rhoe"][(:c, :uₕ, :components, :data, 1)] = 8.6099415698739893e-05
all_best_mse["sphere_baroclinic_wave_rhoe"][(:c, :uₕ, :components, :data, 2)] = 4.6322046929675134e-02
all_best_mse["sphere_baroclinic_wave_rhoe"][(:f, :w, :components, :data, 1)] = 1.3319686299101685e+00
#
all_best_mse["sphere_baroclinic_wave_rhoe_equilmoist"] = OrderedCollections.OrderedDict()
all_best_mse["sphere_baroclinic_wave_rhoe_equilmoist"][(:c, )] = 2.1107977959107045e-8
all_best_mse["sphere_baroclinic_wave_rhoe_equilmoist"][(:c, :ρe_tot)] = 9.873491232147725e-7
all_best_mse["sphere_baroclinic_wave_rhoe_equilmoist"][(:c, :uₕ, :components, :data, 1)] = 3.2179784938613176e-5
all_best_mse["sphere_baroclinic_wave_rhoe_equilmoist"][(:c, :uₕ, :components, :data, 2)] = 0.006691412083092768
all_best_mse["sphere_baroclinic_wave_rhoe_equilmoist"][(:c, :ρq_tot)] = 7.141990719057408e-6
all_best_mse["sphere_baroclinic_wave_rhoe_equilmoist"][(:f, :w, :components, :data, 1)] = 4.207743738519047
all_best_mse["sphere_baroclinic_wave_rhoe_equilmoist"][(:c, )] = 2.5730268885919375e-08
all_best_mse["sphere_baroclinic_wave_rhoe_equilmoist"][(:c, :ρe_tot)] = 1.3992122220669325e-06
all_best_mse["sphere_baroclinic_wave_rhoe_equilmoist"][(:c, :uₕ, :components, :data, 1)] = 3.1160896522822743e-05
all_best_mse["sphere_baroclinic_wave_rhoe_equilmoist"][(:c, :uₕ, :components, :data, 2)] = 7.2260271080074965e-03
all_best_mse["sphere_baroclinic_wave_rhoe_equilmoist"][(:c, :ρq_tot)] = 9.7433774527988526e-06
all_best_mse["sphere_baroclinic_wave_rhoe_equilmoist"][(:f, :w, :components, :data, 1)] = 4.1874289555362028e+00
#
all_best_mse["sphere_held_suarez_rhoe"] = OrderedCollections.OrderedDict()
all_best_mse["sphere_held_suarez_rhoe"][(:c, )] = 4.919979310209457e-9
all_best_mse["sphere_held_suarez_rhoe"][(:c, :ρe_tot)] = 8.526725426842912e-8
all_best_mse["sphere_held_suarez_rhoe"][(:c, :uₕ, :components, :data, 1)] = 0.0005895892885837914
all_best_mse["sphere_held_suarez_rhoe"][(:c, :uₕ, :components, :data, 2)] = 0.014078405102092283
all_best_mse["sphere_held_suarez_rhoe"][(:f, :w, :components, :data, 1)] = 2.7221574293952524
all_best_mse["sphere_held_suarez_rhoe"][(:c, )] = 6.2778199507009592e-09
all_best_mse["sphere_held_suarez_rhoe"][(:c, :ρe_tot)] = 1.1217787432959677e-07
all_best_mse["sphere_held_suarez_rhoe"][(:c, :uₕ, :components, :data, 1)] = 6.9688206609128925e-04
all_best_mse["sphere_held_suarez_rhoe"][(:c, :uₕ, :components, :data, 2)] = 1.5636232022448279e-02
all_best_mse["sphere_held_suarez_rhoe"][(:f, :w, :components, :data, 1)] = 2.9236318551400702e+00
#
all_best_mse["sphere_held_suarez_rhoe_int"] = OrderedCollections.OrderedDict()
all_best_mse["sphere_held_suarez_rhoe_int"][(:c, )] = 1.4180717938002013e-8
all_best_mse["sphere_held_suarez_rhoe_int"][(:c, :ρe_int)] = 1.922737444328935e-6
all_best_mse["sphere_held_suarez_rhoe_int"][(:c, :uₕ, :components, :data, 1)] = 0.000784261929391553
all_best_mse["sphere_held_suarez_rhoe_int"][(:c, :uₕ, :components, :data, 2)] = 0.012374839431819338
all_best_mse["sphere_held_suarez_rhoe_int"][(:f, :w, :components, :data, 1)] = 2.4220064683086586
all_best_mse["sphere_held_suarez_rhoe_int"][(:c, )] = 1.6268100221432348e-08
all_best_mse["sphere_held_suarez_rhoe_int"][(:c, :ρe_int)] = 2.2951272392218787e-06
all_best_mse["sphere_held_suarez_rhoe_int"][(:c, :uₕ, :components, :data, 1)] = 9.1172256434432085e-04
all_best_mse["sphere_held_suarez_rhoe_int"][(:c, :uₕ, :components, :data, 2)] = 1.3212278564959526e-02
all_best_mse["sphere_held_suarez_rhoe_int"][(:f, :w, :components, :data, 1)] = 2.4745736750594602e+00
#
all_best_mse["edmf_bomex"] = OrderedCollections.OrderedDict()
all_best_mse["edmf_bomex"][(:c, )] = 0.0
Expand Down
1 change: 1 addition & 0 deletions test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ group = get(ENV, "TEST_GROUP", :all) |> Symbol
@testset "Unit tests" begin
include("test_domains.jl")
include("test_models.jl")
include("test_thomas.jl")
end
end
end
Expand Down
27 changes: 27 additions & 0 deletions test/test_thomas.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
using Test
using LinearAlgebra
using ClimaAtmos
using NVTX
using Colors
include("../examples/hybrid/nvtx.jl")
include("../examples/hybrid/schur_complement_W.jl")


@testset "Thomas algorithm tests for Float32 and Float64" begin
FT = Float64
for FT in (Float32, Float64)
#! format: off
dl = FT.([0.0, 1825.3503, 645.0884, 329.23483, 201.36221, 136.82605, 99.648796, 76.03936, 59.650623, 0.0])
d = FT.([-1.0, -5813.132, -1572.7207, -718.2993, -409.75415, -262.26147, -180.3288, -130.9273, -99.37381, -77.86787, -1.0])
du = FT.([0.0, 924.0845, 386.9652, 207.68958, 125.552765, 80.648224, 53.839355, 37.18377, 26.495516, 0.0])
b = FT.([0.0, -3.813298, -19.938122, -51.053738, -99.55792, -201.72447, -450.88504, -944.41315, -1691.7865, -2629.067, 0.0])
#! format: off
A = Tridiagonal(dl, d, du)
Ac = deepcopy(A)
bc = deepcopy(b)
thomas_algorithm!(Ac, bc)
ldiv!(lu!(A), b)
relative_err = norm(b .- bc) / norm(b)
@test relative_err sqrt(eps(FT))
end
end

0 comments on commit 99a8f88

Please sign in to comment.