From 5506f5fde7c7f3098c1007140af6eb8105c0f5ec Mon Sep 17 00:00:00 2001 From: Yuto Horikawa Date: Sun, 17 Jan 2021 14:04:29 +0900 Subject: [PATCH] Slightly more performance (#94) * update if statement, unicode symbols * remove meaningless assignment * remove redifinition of Base functions * fix type unstability, update symbols to unicode greek * update symbols to unicode greek * update symbols using Unicode greeks * move some bessel-roots related functions to besselroots.jl, and remove duplicates * remove unnecessary min function * add StaticArrays.jl for faster besselroots * add (a)inline for faster gausslegendre * add constants.jl to store all constants * fix compat table for StaticArrays.jl * bump version to v0.4.6 --- Project.toml | 4 +- src/FastGaussQuadrature.jl | 11 +- src/besselroots.jl | 213 +++++++++++++++++++++++-------------- src/constants.jl | 110 +++++++++++++++++++ src/gausshermite.jl | 103 +++++++++--------- src/gaussjacobi.jl | 14 +-- src/gausslaguerre.jl | 14 +-- src/gausslegendre.jl | 101 +----------------- test/test_besselroots.jl | 6 +- 9 files changed, 322 insertions(+), 254 deletions(-) create mode 100644 src/constants.jl diff --git a/Project.toml b/Project.toml index 5fe1776..1741923 100644 --- a/Project.toml +++ b/Project.toml @@ -1,13 +1,15 @@ name = "FastGaussQuadrature" uuid = "442a2c76-b920-505d-bb47-c5924d526838" -version = "0.4.5" +version = "0.4.6" [deps] LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" SpecialFunctions = "276daf66-3868-5448-9aa4-cd146d93841b" +StaticArrays = "90137ffa-7385-5640-81b9-e52037218182" [compat] SpecialFunctions = "0.7, 0.8, 0.9, 0.10, 1.0" +StaticArrays = "1.0" julia = "1" [extras] diff --git a/src/FastGaussQuadrature.jl b/src/FastGaussQuadrature.jl index 6ef6188..f37094f 100644 --- a/src/FastGaussQuadrature.jl +++ b/src/FastGaussQuadrature.jl @@ -1,12 +1,8 @@ module FastGaussQuadrature -using SpecialFunctions, LinearAlgebra -cumprod(A::AbstractArray) = Base.cumprod(A, dims=1) -cumprod(A::AbstractArray, d::Int) = Base.cumprod(A, dims=d) -sum(A::AbstractArray, n::Int) = Base.sum(A, dims=n) -sum(A) = Base.sum(A) -flipdim(A, d) = reverse(A, dims=d) - +using LinearAlgebra +using SpecialFunctions +using StaticArrays export gausslegendre export gausschebyshev @@ -19,6 +15,7 @@ export besselroots import SpecialFunctions: besselj, airyai, airyaiprime +include("constants.jl") include("gausslegendre.jl") include("gausschebyshev.jl") include("gausslaguerre.jl") diff --git a/src/besselroots.jl b/src/besselroots.jl index a9d84eb..c4f68c5 100644 --- a/src/besselroots.jl +++ b/src/besselroots.jl @@ -42,22 +42,22 @@ function besselroots(ν::Float64, n::Integer) end x = zeros(n) - if n > 0 && ν == 0 + if ν == 0 for k in 1:min(n,20) x[k] = J0_roots[k] end for k in min(n,20)+1:n x[k] = McMahon(ν, k) end - elseif n > 0 && ν ≥ -1 && ν ≤ 5 - correctFirstFew = Piessens( ν ) + elseif -1 ≤ ν ≤ 5 + correctFirstFew = Piessens(ν) for k in 1:min(n,6) x[k] = correctFirstFew[k] end for k in min(n,6)+1:n x[k] = McMahon(ν, k) end - elseif ν > 5 + elseif 5 < ν for k in 1:n x[k] = McMahon(ν, k) end @@ -74,91 +74,148 @@ function McMahon(ν::Float64, k::Integer) # McMahon's expansion. This expansion gives very accurate approximation # for the sth zero (s ≥ 7) in the whole region ν ≥ -1, and moderate # approximation in other cases. - mu = 4ν^2 + μ = 4ν^2 a1 = 1 / 8 - a3 = (7mu-31) / 384 - a5 = 4*(3779+mu*(-982+83mu)) / 61440 # Evaluate via Horner's method. - a7 = 6*(-6277237+mu*(1585743+mu*(-153855+6949mu))) / 20643840 - a9 = 144*(2092163573+mu*(-512062548+mu*(48010494+mu*(-2479316+70197mu)))) / 11890851840 - a11 = 720 *(-8249725736393+mu*(1982611456181+mu*(-179289628602+mu*(8903961290 + - mu*(-287149133 + 5592657mu))))) / 10463949619200 - a13 = 576 *(423748443625564327 + mu*(-100847472093088506 + mu*(8929489333108377 + - mu*(-426353946885548+mu*(13172003634537+mu*(-291245357370 + 4148944183mu)))))) / 13059009124761600 + a3 = (7μ-31) / 384 + a5 = 4*(3779+μ*(-982+83μ)) / 61440 # Evaluate via Horner's method. + a7 = 6*(-6277237+μ*(1585743+μ*(-153855+6949μ))) / 20643840 + a9 = 144*(2092163573+μ*(-512062548+μ*(48010494+μ*(-2479316+70197μ)))) / 11890851840 + a11 = 720 *(-8249725736393+μ*(1982611456181+μ*(-179289628602+μ*(8903961290 + + μ*(-287149133 + 5592657μ))))) / 10463949619200 + a13 = 576 *(423748443625564327 + μ*(-100847472093088506 + μ*(8929489333108377 + + μ*(-426353946885548+μ*(13172003634537+μ*(-291245357370 + 4148944183μ)))))) / 13059009124761600 b = 0.25 * (2ν+4k-1)*π # Evaluate using Horner's scheme: - x = b - (mu-1)*( ((((((a13/b^2 + a11)/b^2 + a9)/b^2 + a7)/b^2 + a5)/b^2 + a3)/b^2 + a1)/b) + x = b - (μ-1)*( ((((((a13/b^2 + a11)/b^2 + a9)/b^2 + a7)/b^2 + a5)/b^2 + a3)/b^2 + a1)/b) return x end -# Roots of Bessel funcion ``J_0`` in Float64. -# https://mathworld.wolfram.com/BesselFunctionZeros.html -const J0_roots = - [ 2.4048255576957728 - 5.5200781102863106 - 8.6537279129110122 - 11.791534439014281 - 14.930917708487785 - 18.071063967910922 - 21.211636629879258 - 24.352471530749302 - 27.493479132040254 - 30.634606468431975 - 33.775820213573568 - 36.917098353664044 - 40.058425764628239 - 43.199791713176730 - 46.341188371661814 - 49.482609897397817 - 52.624051841114996 - 55.765510755019979 - 58.906983926080942 - 62.048469190227170 ] - - -const Piessens_C = [ - 2.883975316228 8.263194332307 11.493871452173 14.689036505931 17.866882871378 21.034784308088 - 0.767665211539 4.209200330779 4.317988625384 4.387437455306 4.435717974422 4.471319438161 - -0.086538804759 -0.164644722483 -0.130667664397 -0.109469595763 -0.094492317231 -0.083234240394 - 0.020433979038 0.039764618826 0.023009510531 0.015359574754 0.011070071951 0.008388073020 - -0.006103761347 -0.011799527177 -0.004987164201 -0.002655024938 -0.001598668225 -0.001042443435 - 0.002046841322 0.003893555229 0.001204453026 0.000511852711 0.000257620149 0.000144611721 - -0.000734476579 -0.001369989689 -0.000310786051 -0.000105522473 -0.000044416219 -0.000021469973 - 0.000275336751 0.000503054700 0.000083834770 0.000022761626 0.000008016197 0.000003337753 - -0.000106375704 -0.000190381770 -0.000023343325 -0.000005071979 -0.000001495224 -0.000000536428 - 0.000042003336 0.000073681222 0.000006655551 0.000001158094 0.000000285903 0.000000088402 - -0.000016858623 -0.000029010830 -0.000001932603 -0.000000269480 -0.000000055734 -0.000000014856 - 0.000006852440 0.000011579131 0.000000569367 0.000000063657 0.000000011033 0.000000002536 - -0.000002813300 -0.000004672877 -0.000000169722 -0.000000015222 -0.000000002212 -0.000000000438 - 0.000001164419 0.000001903082 0.000000051084 0.000000003677 0.000000000448 0.000000000077 - -0.000000485189 -0.000000781030 -0.000000015501 -0.000000000896 -0.000000000092 -0.000000000014 - 0.000000203309 0.000000322648 0.000000004736 0.000000000220 0.000000000019 0.000000000002 - -0.000000085602 -0.000000134047 -0.000000001456 -0.000000000054 -0.000000000004 0 - 0.000000036192 0.000000055969 0.000000000450 0.000000000013 0 0 - -0.000000015357 -0.000000023472 -0.000000000140 -0.000000000003 0 0 - 0.000000006537 0.000000009882 0.000000000043 0.000000000001 0 0 - -0.000000002791 -0.000000004175 -0.000000000014 0 0 0 - 0.000000001194 0.000000001770 0.000000000004 0 0 0 - -0.000000000512 -0.000000000752 0 0 0 0 - 0.000000000220 0.000000000321 0 0 0 0 - -0.000000000095 -0.000000000137 0 0 0 0 - 0.000000000041 0.000000000059 0 0 0 0 - -0.000000000018 -0.000000000025 0 0 0 0 - 0.000000000008 0.000000000011 0 0 0 0 - -0.000000000003 -0.000000000005 0 0 0 0 - 0.000000000001 0.000000000002 0 0 0 0] +function _piessens_chebyshev30(ν::Float64) + # Piessens's Chebyshev series approximations (1984). Calculates the 6 first + # zeros to at least 12 decimal figures in region -1 ≤ ν ≤ 5: + pt = (ν-2)/3 + T1 = 1.0 + T2 = pt + T3 = 2pt*T2 - T1 + T4 = 2pt*T3 - T2 + T5 = 2pt*T4 - T3 + T6 = 2pt*T5 - T4 + T7 = 2pt*T6 - T5 + T8 = 2pt*T7 - T6 + T9 = 2pt*T8 - T7 + T10 = 2pt*T9 - T8 + T11 = 2pt*T10 - T9 + T12 = 2pt*T11 - T10 + T13 = 2pt*T12 - T11 + T14 = 2pt*T13 - T12 + T15 = 2pt*T14 - T13 + T16 = 2pt*T15 - T14 + T17 = 2pt*T16 - T15 + T18 = 2pt*T17 - T16 + T19 = 2pt*T18 - T17 + T20 = 2pt*T19 - T18 + T21 = 2pt*T20 - T19 + T22 = 2pt*T21 - T20 + T23 = 2pt*T22 - T21 + T24 = 2pt*T23 - T22 + T25 = 2pt*T24 - T23 + T26 = 2pt*T25 - T24 + T27 = 2pt*T26 - T25 + T28 = 2pt*T27 - T26 + T29 = 2pt*T28 - T27 + T30 = 2pt*T29 - T28 + + T = SVector(T1,T2,T3,T4,T5,T6,T7,T8,T9,T10,T11,T12,T13,T14,T15,T16,T17,T18,T19,T20,T21,T22,T23,T24,T25,T26,T27,T28,T29,T30) + return T +end function Piessens(ν::Float64) # Piessens's Chebyshev series approximations (1984). Calculates the 6 first # zeros to at least 12 decimal figures in region -1 ≤ ν ≤ 5: C = Piessens_C - T = Array{Float64}(undef,size(C,1)) - pt = (ν-2)/3 - T[1], T[2] = 1., pt - for k = 2:size(C,1)-1 - T[k+1] = 2*pt*T[k] - T[k-1] - end + T = _piessens_chebyshev30(ν) y = C'*T - y[1] *= sqrt(ν+1) # Scale the first root. - return y + _y = collect(y) + _y[1] *= sqrt(ν+1) # Scale the first root. + return _y +end + + +function besselZeroRoots(m) + # BESSEL0ROOTS ROOTS OF BESSELJ(0,x). USE ASYMPTOTICS. + # Use McMahon's expansion for the remainder (NIST, 10.21.19): + jk = Array{Float64}(undef, m) + p = (1071187749376 / 315, 0.0, -401743168 / 105, 0.0, 120928 / 15, + 0.0, -124 / 3, 0.0, 1.0, 0.0) + # First 20 are precomputed: + @inbounds for jj = 1:min(m, 20) + jk[jj] = J0_roots[jj] + end + @inbounds for jj = 21:min(m, 47) + ak = π * (jj - .25) + ak82 = (.125 / ak)^2 + jk[jj] = ak + .125 / ak * @evalpoly(ak82, 1.0, p[7], p[5], p[3]) + end + @inbounds for jj = 48:min(m, 344) + ak = π * (jj - .25) + ak82 = (.125 / ak)^2 + jk[jj] = ak + .125 / ak * @evalpoly(ak82, 1.0, p[7], p[5]) + end + @inbounds for jj = 345:min(m,13191) + ak = π * (jj - .25) + ak82 = (.125 / ak)^2 + jk[jj] = ak + .125 / ak * muladd(ak82, p[7], 1.0) + end + @inbounds for jj = 13192:m + ak = π * (jj - .25) + jk[jj] = ak + .125 / ak + end + return jk +end + +function besselJ1(m) + # BESSELJ1 EVALUATE BESSELJ(1,x)^2 AT ROOTS OF BESSELJ(0,x). + # USE ASYMPTOTICS. Use Taylor series of (NIST, 10.17.3) and McMahon's + # expansion (NIST, 10.21.19): + Jk2 = Array{Float64}(undef, m) + c = (-171497088497 / 15206400, 461797 / 1152, -172913 / 8064, + 151 / 80, -7 / 24, 0.0, 2.0) + # First 10 are precomputed: + @inbounds for jj = 1:min(m, 10) + Jk2[jj] = besselJ1_10[jj] + end + @inbounds for jj = 11:min(m, 15) + ak = π * (jj - .25) + ak2 = (1 / ak)^2 + Jk2[jj] = 1 / (π * ak) * muladd(@evalpoly(ak2, c[5], c[4], c[3], + c[2], c[1]), ak2^2, c[7]) + end + @inbounds for jj = 16:min(m, 21) + ak = π * (jj - .25) + ak2 = (1 / ak)^2 + Jk2[jj] = 1 / (π * ak) * muladd(@evalpoly(ak2, c[5], c[4], c[3], c[2]), + ak2^2, c[7]) + end + @inbounds for jj = 22:min(m,55) + ak = π * (jj - .25) + ak2 = (1 / ak)^2 + Jk2[jj] = 1 / (π * ak) * muladd(@evalpoly(ak2, c[5], c[4], c[3]), + ak2^2, c[7]) + end + @inbounds for jj = 56:min(m,279) + ak = π * (jj - .25) + ak2 = (1 / ak)^2 + Jk2[jj] = 1 / (π * ak) * muladd(muladd(ak2, c[4], c[5]), ak2^2, c[7]) + end + @inbounds for jj = 280:min(m,2279) + ak = π * (jj - .25) + ak2 = (1 / ak)^2 + Jk2[jj] = 1 / (π * ak) * muladd(ak2^2, c[5], c[7]) + end + @inbounds for jj = 2280:m + ak = π * (jj - .25) + Jk2[jj] = 1 / (π * ak) * c[7] + end + return Jk2 end diff --git a/src/constants.jl b/src/constants.jl new file mode 100644 index 0000000..bf1d014 --- /dev/null +++ b/src/constants.jl @@ -0,0 +1,110 @@ +@doc raw""" +First twenty roots of Bessel funcion ``J_0`` in Float64. +https://mathworld.wolfram.com/BesselFunctionZeros.html +""" +const J0_roots = @SVector [ + 2.4048255576957728, + 5.5200781102863106, + 8.6537279129110122, + 11.791534439014281, + 14.930917708487785, + 18.071063967910922, + 21.211636629879258, + 24.352471530749302, + 27.493479132040254, + 30.634606468431975, + 33.775820213573568, + 36.917098353664044, + 40.058425764628239, + 43.199791713176730, + 46.341188371661814, + 49.482609897397817, + 52.624051841114996, + 55.765510755019979, + 58.906983926080942, + 62.048469190227170, +] + + +@doc raw""" +Coefficients of Chebyshev series approximations for the zeros of the Bessel functions +```math +j_{\nu, s} \approx \sum_{k}^{n} C_{k,s} T_k(\frac{\nu-2}{3}) +``` +where $j_{\nu, s}$ is a ``s``-th zero of Bessel function ``J_\nu``, ``\{T_k\}`` are Chebyshev polynomials and ``C_{k, s}`` is the coefficients. +""" +const Piessens_C = @SMatrix [ + 2.883975316228 8.263194332307 11.493871452173 14.689036505931 17.866882871378 21.034784308088 + 0.767665211539 4.209200330779 4.317988625384 4.387437455306 4.435717974422 4.471319438161 + -0.086538804759 -0.164644722483 -0.130667664397 -0.109469595763 -0.094492317231 -0.083234240394 + 0.020433979038 0.039764618826 0.023009510531 0.015359574754 0.011070071951 0.008388073020 + -0.006103761347 -0.011799527177 -0.004987164201 -0.002655024938 -0.001598668225 -0.001042443435 + 0.002046841322 0.003893555229 0.001204453026 0.000511852711 0.000257620149 0.000144611721 + -0.000734476579 -0.001369989689 -0.000310786051 -0.000105522473 -0.000044416219 -0.000021469973 + 0.000275336751 0.000503054700 0.000083834770 0.000022761626 0.000008016197 0.000003337753 + -0.000106375704 -0.000190381770 -0.000023343325 -0.000005071979 -0.000001495224 -0.000000536428 + 0.000042003336 0.000073681222 0.000006655551 0.000001158094 0.000000285903 0.000000088402 + -0.000016858623 -0.000029010830 -0.000001932603 -0.000000269480 -0.000000055734 -0.000000014856 + 0.000006852440 0.000011579131 0.000000569367 0.000000063657 0.000000011033 0.000000002536 + -0.000002813300 -0.000004672877 -0.000000169722 -0.000000015222 -0.000000002212 -0.000000000438 + 0.000001164419 0.000001903082 0.000000051084 0.000000003677 0.000000000448 0.000000000077 + -0.000000485189 -0.000000781030 -0.000000015501 -0.000000000896 -0.000000000092 -0.000000000014 + 0.000000203309 0.000000322648 0.000000004736 0.000000000220 0.000000000019 0.000000000002 + -0.000000085602 -0.000000134047 -0.000000001456 -0.000000000054 -0.000000000004 0 + 0.000000036192 0.000000055969 0.000000000450 0.000000000013 0 0 + -0.000000015357 -0.000000023472 -0.000000000140 -0.000000000003 0 0 + 0.000000006537 0.000000009882 0.000000000043 0.000000000001 0 0 + -0.000000002791 -0.000000004175 -0.000000000014 0 0 0 + 0.000000001194 0.000000001770 0.000000000004 0 0 0 + -0.000000000512 -0.000000000752 0 0 0 0 + 0.000000000220 0.000000000321 0 0 0 0 + -0.000000000095 -0.000000000137 0 0 0 0 + 0.000000000041 0.000000000059 0 0 0 0 + -0.000000000018 -0.000000000025 0 0 0 0 + 0.000000000008 0.000000000011 0 0 0 0 + -0.000000000003 -0.000000000005 0 0 0 0 + 0.000000000001 0.000000000002 0 0 0 0 +] + + +@doc raw""" +Values of Bessel function ``J_1`` on first ten roots of Bessel function `J_0`. + +# Examples +```jldoctest; setup = :(using FastGaussQuadrature, SpecialFunctions) +julia> roots = besselroots(0,10); + +julia> (besselj1.(roots)).^2 ≈ FastGaussQuadrature.besselJ1_10 +true +``` +""" +const besselJ1_10 = @SVector [ + 0.2695141239419169, + 0.1157801385822037, + 0.07368635113640822, + 0.05403757319811628, + 0.04266142901724309, + 0.03524210349099610, + 0.03002107010305467, + 0.02614739149530809, + 0.02315912182469139, + 0.02078382912226786, +] + + +@doc raw""" +The first 11 roots of the Airy function in Float64 precision +""" +const airy_roots = @SVector [ + -2.338107410459767, + -4.08794944413097, + -5.520559828095551, + -6.786708090071759, + -7.944133587120853, + -9.02265085340981, + -10.04017434155809, + -11.00852430373326, + -11.93601556323626, + -12.828776752865757, + -13.69148903521072, +] diff --git a/src/gausshermite.jl b/src/gausshermite.jl index 00d6fdc..8f60b5d 100644 --- a/src/gausshermite.jl +++ b/src/gausshermite.jl @@ -44,16 +44,16 @@ function unweightedgausshermite(n::Integer) end # fold out - if mod(n,2) == 1 - w = [flipdim(x[2][:],1); x[2][2:end]] - x = [-flipdim(x[1],1) ; x[1][2:end]] + if isodd(n) + _w = [reverse(x[2][:]); x[2][2:end]] + _x = [-reverse(x[1]) ; x[1][2:end]] else - w = [flipdim(x[2][:],1); x[2][:]] - x = [-flipdim(x[1],1) ; x[1]] + _w = [reverse(x[2][:]); x[2][:]] + _x = [-reverse(x[1]) ; x[1]] end - w .*= sqrt(π)/sum(exp.(-x.^2).*w) + _w .*= sqrt(π)/sum(exp.(-_x.^2).*_w) - return x, w + return _x, _w end function hermpts_asy(n::Integer) @@ -61,17 +61,17 @@ function hermpts_asy(n::Integer) x0 = HermiteInitialGuesses(n) # get initial guesses t0 = x0./sqrt(2n+1) - theta0 = acos.(t0) # convert to theta-variable + θ = acos.(t0) # convert to θ-variable val = x0; - for k = 1:20 - val = hermpoly_asy_airy(n, theta0); - dt = -val[1]./(sqrt(2).*sqrt(2n+1).*val[2].*sin.(theta0)) - theta0 .-= dt # Newton update - if norm(dt,Inf) < sqrt(eps(Float64))/10 + for _ in 1:20 + val = hermpoly_asy_airy(n, θ); + dθ = -val[1]./(sqrt(2).*sqrt(2n+1).*val[2].*sin.(θ)) + θ .-= dθ # Newton update + if norm(dθ,Inf) < sqrt(eps(Float64))/10 break end end - t0 = cos.(theta0) + t0 = cos.(θ) x = sqrt(2n+1)*t0 #back to x-variable w = x.*val[1] .+ sqrt(2).*val[2] w .= 1 ./ w.^2 # quadrature weights @@ -85,12 +85,12 @@ function hermpts_rec(n::Integer) x0 = HermiteInitialGuesses(n) x0 .*= sqrt(2) val = x0 - for _ = 1:10 + for _ in 1:10 val = hermpoly_rec.(n, x0) dx = first.(val)./last.(val) dx[ isnan.( dx ) ] .= 0 x0 .= x0 .- dx - if norm(dx, Inf)>1 bess = (1:m)*π a = .5 @@ -262,7 +265,7 @@ let T(t) = @. t^(2/3)*(1+5/48*t^(-2)-5/36*t^(-4)+(77125/82944)*t^(-6) -108056875 bess = ((0:m-1) .+ 0.5)*π a = -.5 end - nu = 4*m + 2*a + 2 + ν = 4m + 2a + 2 airyrts = -T(3/8*π*(4*(1:m) .- 1)) @@ -280,18 +283,18 @@ let T(t) = @. t^(2/3)*(1+5/48*t^(-2)-5/36*t^(-4)+(77125/82944)*t^(-6) -108056875 -12.828776752865757] airyrts[1:10] = airyrts_exact # correct first 10. - x_init = sqrt.(abs.(nu .+ (2^(2/3)).*airyrts.*nu^(1/3) .+ (1/5*2^(4/3)).*airyrts.^2 .* nu^(-1/3) .+ - (11/35-a^2-12/175).*airyrts.^3 ./ nu .+ ((16/1575).*airyrts.+(92/7875).*airyrts.^4).*2^(2/3).*nu^(-5/3) .- - ((15152/3031875).*airyrts.^5 .+ (1088/121275).*airyrts.^2).*2^(1/3).*nu^(-7/3))) - x_init_airy = flipdim(x_init,1) + x_init = sqrt.(abs.(ν .+ (2^(2/3)).*airyrts.*ν^(1/3) .+ (1/5*2^(4/3)).*airyrts.^2 .* ν^(-1/3) .+ + (11/35-a^2-12/175).*airyrts.^3 ./ ν .+ ((16/1575).*airyrts.+(92/7875).*airyrts.^4).*2^(2/3).*ν^(-5/3) .- + ((15152/3031875).*airyrts.^5 .+ (1088/121275).*airyrts.^2).*2^(1/3).*ν^(-7/3))) + x_init_airy = reverse(x_init) # Tricomi initial guesses. Equation (2.1) in [1]. Originally in [2]. # These initial guesses are good near x = 0 . Note: zeros of besselj(+/-.5,x) # are integer and half-integer multiples of π. - # x_init_bess = bess/sqrt(nu).*sqrt((1+ (bess.^2+2*(a^2-1))/3/nu^2) ); + # x_init_bess = bess/sqrt(ν).*sqrt((1+ (bess.^2+2*(a^2-1))/3/ν^2) ); Tnk0 = fill(π/2,m) - nu = (4*m+2*a+2) - rhs = ((4*m+3) .- 4*(1:m))/nu*π + ν = (4*m+2*a+2) + rhs = ((4*m+3) .- 4*(1:m))/ν*π for k = 1:7 val = Tnk0 .- sin.(Tnk0) .- rhs @@ -301,14 +304,14 @@ let T(t) = @. t^(2/3)*(1+5/48*t^(-2)-5/36*t^(-4)+(77125/82944)*t^(-6) -108056875 end tnk = cos.(Tnk0./2).^2 - x_init_sin = @. sqrt(nu*tnk - (5 / (4 * (1-tnk)^2) - 1 / (1 - tnk)-1 + 3*a^2)/3 / nu) + x_init_sin = @. sqrt(ν*tnk - (5 / (4 * (1-tnk)^2) - 1 / (1 - tnk)-1 + 3*a^2)/3 / ν) # Patch together p = 0.4985+eps(Float64) x_init = [x_init_sin[1:convert(Int,floor(p*n))] ; x_init_airy[convert(Int,ceil(p*n)):end]] - if mod(n, 2) == 1 + if isodd(n) x_init = [0 ; x_init] x_init = x_init[1:m+1] else diff --git a/src/gaussjacobi.jl b/src/gaussjacobi.jl index 36cfa33..3fb6e85 100644 --- a/src/gaussjacobi.jl +++ b/src/gaussjacobi.jl @@ -275,32 +275,32 @@ function feval_asy1(n::Integer, α::Float64, β::Float64, t::AbstractVector, idx cosA2 = cosA.*cosT .+ sinA.*sinT sinA2 = sinA.*cosT .- cosA.*sinT - sinT = hcat(ones(N), cumprod(repeat((csc.(t/2)/2),1,M-1),2)) # M × N matrix + sinT = hcat(ones(N), cumprod(repeat((csc.(t/2)/2),1,M-1), dims=2)) # M × N matrix secT = sec.(t/2)/2 j = 0:M-2 _vec = @. (0.5+α+j)*(0.5-α+j)/(j+1)/(2n+α+β+j+2) - P1 = [1;cumprod(_vec,1)] + P1 = [1;cumprod(_vec)] P1[3:4:end] = -P1[3:4:end] P1[4:4:end] = -P1[4:4:end] P2 = Matrix(1.0I, M, M) for l in 0:M-1 j = 0:M-l-2 _vec = @. (0.5+β+j)*(0.5-β+j)/(j+1)/(2n+α+β+j+l+2) - P2[l+1,(l+1).+(1:length(j))] = cumprod(_vec,1) + P2[l+1,(l+1).+(1:length(j))] = cumprod(_vec) end PHI = repeat(P1,1,M).*P2 j = 0:M-2 _vec = @. (0.5+α+j)*(0.5-α+j)/(j+1)/(2*(n-1)+α+β+j+2) - P1 = [1;cumprod(_vec,1)] + P1 = [1;cumprod(_vec)] P1[3:4:end] = -P1[3:4:end] P1[4:4:end] = -P1[4:4:end] P2 = Matrix(1.0I, M, M) for l in 0:M-1 j = 0:M-l-2 _vec = @. (0.5+β+j)*(0.5-β+j)/(j+1)/(2*(n-1)+α+β+j+l+2) - P2[l+1,(l+1).+(1:length(j))] = cumprod(_vec,1) + P2[l+1,(l+1).+(1:length(j))] = cumprod(_vec) end PHI2 = repeat(P1,1,M).*P2 @@ -369,12 +369,12 @@ function feval_asy1(n::Integer, α::Float64, β::Float64, t::AbstractVector, idx return vals, ders end -function boundary(n::Integer, α::Float64, β::Float64, npts) +function boundary(n::Integer, α::Float64, β::Float64, npts::Integer) # Algorithm for computing nodes and weights near the boundary. # Use Newton iterations to find the first few Bessel roots: smallK = min(30, npts) - jk = besselroots(α, min(npts, smallK)) + jk = besselroots(α, smallK) # Approximate roots via asymptotic formula: (see Olver 1974) phik = jk/(n + .5*(α + β + 1)) diff --git a/src/gausslaguerre.jl b/src/gausslaguerre.jl index 7f9a365..09e32d1 100644 --- a/src/gausslaguerre.jl +++ b/src/gausslaguerre.jl @@ -85,7 +85,7 @@ function gausslaguerre(n::Integer, α::Real; reduced = false) gausslaguerre_rec(n, α) else # Use explicit asymptotic expansions for larger n - # The restriction to α comes from the restriction on nu in besselroots + # The restriction to α comes from the restriction on ν in besselroots if α < 5 gausslaguerre_asy(n, α, reduced=reduced, T=-1, recompute=true) else @@ -98,12 +98,6 @@ end underflow_threshold(x) = underflow_threshold(typeof(x)) underflow_threshold(::Type{T}) where {T <: AbstractFloat} = 10floatmin(T) -# We explicitly store the first 11 roots of the Airy function in Float64 precision -const airy_roots = [-2.338107410459767, -4.08794944413097, -5.520559828095551, - -6.786708090071759, -7.944133587120853, -9.02265085340981, -10.04017434155809, - -11.00852430373326, -11.93601556323626, -12.828776752865757, -13.69148903521072] - - """ Compute the Gauss-Laguerre rule using explicit asymptotic expansions for the nodes and weights. @@ -144,7 +138,7 @@ function gausslaguerre_asy(n::Integer, α; k += 1 # We iterate until the estimated error of the bulk expansion is smaller # than the one of the Bessel expansion - jak = (k < k_bessel) ? jak_vector[k] : jak = McMahon(α, k) + jak = (k < k_bessel) ? jak_vector[k] : McMahon(α, k) xk, wk, δ_bessel = gausslaguerre_asy_bessel(n, α, jak, d, T) xkb, wkb, δ_bulk = gausslaguerre_asy_bulk(n, α, k, d, T) @@ -647,8 +641,8 @@ function gausslaguerre_rec(n, α; reduced = false) # We compute up to 7 starting values for the Newton iterations n_pre = min(n, 7) - nu = 4n + 2α + 2 - x_pre = T.(besselroots(α, n_pre)).^2 / nu # this is a lower bound by [DLMF 18.16.10] + ν = 4n + 2α + 2 + x_pre = T.(besselroots(α, n_pre)).^2 / ν # this is a lower bound by [DLMF 18.16.10] noUnderflow = true # this flag turns false once the weights start to underflow for k in 1:n diff --git a/src/gausslegendre.jl b/src/gausslegendre.jl index 8082c2f..0fd1d05 100644 --- a/src/gausslegendre.jl +++ b/src/gausslegendre.jl @@ -19,7 +19,7 @@ julia> I ≈ 2/5 true ``` """ -function gausslegendre(n::Integer) +@inline function gausslegendre(n::Integer) # GAUSSLEGENDRE(n) COMPUTE THE GAUSS-LEGENDRE NODES AND WEIGHTS IN O(n) time. if n < 0 @@ -72,7 +72,7 @@ function asy(n) @inbounds for i in 1:m x[i] = -x[i] end - @inbounds mod(n, 2) == 1 && (x[m] = 0.0) + @inbounds isodd(n) && (x[m] = 0.0) return x, w end @@ -230,7 +230,7 @@ function rec(n) PP2[i] = 2 / ((1 - x[i]^2) * PP2[i]^2) end - return x,PP2 + return x, PP2 end function innerRec(n, x) @@ -254,98 +254,3 @@ function innerRec(n, x) end return myPm1, myPPm1 end - -const besselZeros_20 = [2.4048255576957728, 5.5200781102863106, - 8.6537279129110122, 11.791534439014281, - 14.930917708487785, 18.071063967910922, - 21.211636629879258, 24.352471530749302, - 27.493479132040254, 30.634606468431975, - 33.775820213573568, 36.917098353664044, - 40.058425764628239, 43.199791713176730, - 46.341188371661814, 49.482609897397817, - 52.624051841114996, 55.765510755019979, - 58.906983926080942, 62.048469190227170] - -function besselZeroRoots(m) - # BESSEL0ROOTS ROOTS OF BESSELJ(0,x). USE ASYMPTOTICS. - # Use McMahon's expansion for the remainder (NIST, 10.21.19): - jk = Array{Float64}(undef, m) - p = (1071187749376 / 315, 0.0, -401743168 / 105, 0.0, 120928 / 15, - 0.0, -124 / 3, 0.0, 1.0, 0.0) - # First 20 are precomputed: - @inbounds for jj = 1:min(m, 20) - jk[jj] = besselZeros_20[jj] - end - @inbounds for jj = 21:min(m, 47) - ak = π * (jj - .25) - ak82 = (.125 / ak)^2 - jk[jj] = ak + .125 / ak * @evalpoly(ak82, 1.0, p[7], p[5], p[3]) - end - @inbounds for jj = 48:min(m, 344) - ak = π * (jj - .25) - ak82 = (.125 / ak)^2 - jk[jj] = ak + .125 / ak * @evalpoly(ak82, 1.0, p[7], p[5]) - end - @inbounds for jj = 345:min(m,13191) - ak = π * (jj - .25) - ak82 = (.125 / ak)^2 - jk[jj] = ak + .125 / ak * muladd(ak82, p[7], 1.0) - end - @inbounds for jj = 13192:m - ak = π * (jj - .25) - jk[jj] = ak + .125 / ak - end - return jk -end - -const besselJ1_10 = [0.2695141239419169, 0.1157801385822037, - 0.07368635113640822, 0.05403757319811628, - 0.04266142901724309, 0.03524210349099610, - 0.03002107010305467, 0.02614739149530809, - 0.02315912182469139, 0.02078382912226786] - -function besselJ1(m) - # BESSELJ1 EVALUATE BESSELJ(1,x)^2 AT ROOTS OF BESSELJ(0,x). - # USE ASYMPTOTICS. Use Taylor series of (NIST, 10.17.3) and McMahon's - # expansion (NIST, 10.21.19): - Jk2 = Array{Float64}(undef, m) - c = (-171497088497 / 15206400, 461797 / 1152, -172913 / 8064, - 151 / 80, -7 / 24, 0.0, 2.0) - # First 10 are precomputed: - @inbounds for jj = 1:min(m, 10) - Jk2[jj] = besselJ1_10[jj] - end - @inbounds for jj = 11:min(m, 15) - ak = π * (jj - .25) - ak2 = (1 / ak)^2 - Jk2[jj] = 1 / (π * ak) * muladd(@evalpoly(ak2, c[5], c[4], c[3], - c[2], c[1]), ak2^2, c[7]) - end - @inbounds for jj = 16:min(m, 21) - ak = π * (jj - .25) - ak2 = (1 / ak)^2 - Jk2[jj] = 1 / (π * ak) * muladd(@evalpoly(ak2, c[5], c[4], c[3], c[2]), - ak2^2, c[7]) - end - @inbounds for jj = 22:min(m,55) - ak = π * (jj - .25) - ak2 = (1 / ak)^2 - Jk2[jj] = 1 / (π * ak) * muladd(@evalpoly(ak2, c[5], c[4], c[3]), - ak2^2, c[7]) - end - @inbounds for jj = 56:min(m,279) - ak = π * (jj - .25) - ak2 = (1 / ak)^2 - Jk2[jj] = 1 / (π * ak) * muladd(muladd(ak2, c[4], c[5]), ak2^2, c[7]) - end - @inbounds for jj = 280:min(m,2279) - ak = π * (jj - .25) - ak2 = (1 / ak)^2 - Jk2[jj] = 1 / (π * ak) * muladd(ak2^2, c[5], c[7]) - end - @inbounds for jj = 2280:m - ak = π * (jj - .25) - Jk2[jj] = 1 / (π * ak) * c[7] - end - return Jk2 -end diff --git a/test/test_besselroots.jl b/test/test_besselroots.jl index 97eb278..1251c96 100644 --- a/test/test_besselroots.jl +++ b/test/test_besselroots.jl @@ -6,10 +6,10 @@ import SpecialFunctions: besselj tol = 1e-11 - # Check if besselj(nu, besselroots(nu, n) ) is small: + # Check if besselj(ν, besselroots(ν, n) ) is small: - for nu = 0.:0.1:5. + for ν = 0.:0.1:5. n = 10 - @test norm( besselj.(nu, besselroots(nu, n) ), Inf ) < tol + @test norm( besselj.(ν, besselroots(ν, n) ), Inf ) < tol end end