Skip to content

Commit a7858c9

Browse files
khodzhalu-zero
authored andcommitted
added dynnative feature
it selects proper avx/sse3/native feature at run time dynnative wrappers added because rustc forbids using inline(always) and target_feature together
1 parent 4c5bb85 commit a7858c9

File tree

6 files changed

+295
-4
lines changed

6 files changed

+295
-4
lines changed

Cargo.toml

+1
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ edition = "2018"
1515
sys = ["speexdsp-sys"]
1616
sse3 = [ "speexdsp-resampler/sse3" ]
1717
avx = [ "speexdsp-resampler/avx" ]
18+
dynnative = [ "speexdsp-resampler/dynnative" ]
1819

1920
[dev-dependencies]
2021
assert_approx_eq = "1.1.0"

resampler/Cargo.toml

+2-1
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,8 @@ edition = "2018"
1212

1313
[features]
1414
sse3 = []
15-
avx = []
15+
avx = ["sse3"]
16+
dynnative = ["avx"]
1617

1718
[dependencies]
1819
cfg-if = "0.1.10"

resampler/src/speex.rs

+6-3
Original file line numberDiff line numberDiff line change
@@ -833,7 +833,9 @@ fn resampler_basic_zero(
833833
}
834834

835835
cfg_if! {
836-
if #[cfg(feature = "avx")] {
836+
if #[cfg(feature = "dynnative")] {
837+
use dynnative::*;
838+
} else if #[cfg(feature = "avx")] {
837839
use avx::*;
838840
} else if #[cfg(feature = "sse3")] {
839841
use sse3::*;
@@ -1286,7 +1288,8 @@ fn speex_resampler_magic<'a, 'b>(
12861288

12871289
#[cfg(feature = "avx")]
12881290
mod avx;
1289-
#[cfg(not(any(feature = "sse3", feature = "avx")))]
1291+
#[cfg(feature = "dynnative")]
1292+
mod dynnative;
12901293
mod native;
1291-
#[cfg(any(feature = "sse3", feature = "avx"))]
1294+
#[cfg(feature = "sse3")]
12921295
mod sse3;
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
#[allow(clippy::too_many_arguments)]
2+
#[target_feature(enable = "avx")]
3+
pub unsafe fn interpolate_step_single(
4+
in_slice: &[f32],
5+
out_slice: &mut [f32],
6+
out_stride: usize,
7+
out_sample: usize,
8+
oversample: usize,
9+
offset: usize,
10+
n: usize,
11+
sinc_table: &[f32],
12+
frac: f32,
13+
) {
14+
crate::speex::avx::interpolate_step_single(
15+
in_slice, out_slice, out_stride, out_sample, oversample, offset, n,
16+
sinc_table, frac,
17+
);
18+
}
19+
20+
#[allow(clippy::too_many_arguments)]
21+
#[target_feature(enable = "avx")]
22+
pub unsafe fn interpolate_step_double(
23+
in_slice: &[f32],
24+
out_slice: &mut [f32],
25+
out_stride: usize,
26+
out_sample: usize,
27+
oversample: usize,
28+
offset: usize,
29+
n: usize,
30+
sinc_table: &[f32],
31+
frac: f32,
32+
) {
33+
crate::speex::avx::interpolate_step_double(
34+
in_slice, out_slice, out_stride, out_sample, oversample, offset, n,
35+
sinc_table, frac,
36+
);
37+
}
38+
39+
#[target_feature(enable = "avx")]
40+
pub unsafe fn direct_step_single(
41+
in_slice: &[f32],
42+
out_slice: &mut [f32],
43+
out_stride: usize,
44+
out_sample: usize,
45+
n: usize,
46+
sinc_table: &[f32],
47+
) {
48+
crate::speex::avx::direct_step_single(
49+
in_slice, out_slice, out_stride, out_sample, n, sinc_table,
50+
);
51+
}
52+
53+
#[target_feature(enable = "avx")]
54+
pub unsafe fn direct_step_double(
55+
in_slice: &[f32],
56+
out_slice: &mut [f32],
57+
out_stride: usize,
58+
out_sample: usize,
59+
n: usize,
60+
sinc_table: &[f32],
61+
) {
62+
crate::speex::avx::direct_step_double(
63+
in_slice, out_slice, out_stride, out_sample, n, sinc_table,
64+
);
65+
}

resampler/src/speex/dynnative/mod.rs

+156
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,156 @@
1+
#[allow(clippy::too_many_arguments)]
2+
#[inline(always)]
3+
pub fn interpolate_step_single(
4+
in_slice: &[f32],
5+
out_slice: &mut [f32],
6+
out_stride: usize,
7+
out_sample: usize,
8+
oversample: usize,
9+
offset: usize,
10+
n: usize,
11+
sinc_table: &[f32],
12+
frac: f32,
13+
) {
14+
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
15+
{
16+
if is_x86_feature_detected!("avx") {
17+
unsafe {
18+
avx_wrapper::interpolate_step_single(
19+
in_slice, out_slice, out_stride, out_sample, oversample,
20+
offset, n, sinc_table, frac,
21+
);
22+
}
23+
return;
24+
}
25+
26+
if is_x86_feature_detected!("sse3") {
27+
unsafe {
28+
sse3_wrapper::interpolate_step_single(
29+
in_slice, out_slice, out_stride, out_sample, oversample,
30+
offset, n, sinc_table, frac,
31+
);
32+
}
33+
return;
34+
}
35+
}
36+
37+
super::native::interpolate_step_single(
38+
in_slice, out_slice, out_stride, out_sample, oversample, offset, n,
39+
sinc_table, frac,
40+
);
41+
}
42+
43+
#[allow(clippy::too_many_arguments)]
44+
#[inline(always)]
45+
pub fn interpolate_step_double(
46+
in_slice: &[f32],
47+
out_slice: &mut [f32],
48+
out_stride: usize,
49+
out_sample: usize,
50+
oversample: usize,
51+
offset: usize,
52+
n: usize,
53+
sinc_table: &[f32],
54+
frac: f32,
55+
) {
56+
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
57+
{
58+
if is_x86_feature_detected!("avx") {
59+
unsafe {
60+
avx_wrapper::interpolate_step_double(
61+
in_slice, out_slice, out_stride, out_sample, oversample,
62+
offset, n, sinc_table, frac,
63+
);
64+
}
65+
return;
66+
}
67+
68+
if is_x86_feature_detected!("sse3") {
69+
unsafe {
70+
sse3_wrapper::interpolate_step_double(
71+
in_slice, out_slice, out_stride, out_sample, oversample,
72+
offset, n, sinc_table, frac,
73+
);
74+
}
75+
return;
76+
}
77+
}
78+
79+
super::native::interpolate_step_double(
80+
in_slice, out_slice, out_stride, out_sample, oversample, offset, n,
81+
sinc_table, frac,
82+
);
83+
}
84+
85+
#[inline(always)]
86+
pub fn direct_step_single(
87+
in_slice: &[f32],
88+
out_slice: &mut [f32],
89+
out_stride: usize,
90+
out_sample: usize,
91+
n: usize,
92+
sinc_table: &[f32],
93+
) {
94+
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
95+
{
96+
if is_x86_feature_detected!("avx") {
97+
unsafe {
98+
avx_wrapper::direct_step_single(
99+
in_slice, out_slice, out_stride, out_sample, n, sinc_table,
100+
);
101+
}
102+
return;
103+
}
104+
105+
if is_x86_feature_detected!("sse3") {
106+
unsafe {
107+
sse3_wrapper::direct_step_single(
108+
in_slice, out_slice, out_stride, out_sample, n, sinc_table,
109+
);
110+
}
111+
return;
112+
}
113+
}
114+
115+
super::native::direct_step_single(
116+
in_slice, out_slice, out_stride, out_sample, n, sinc_table,
117+
);
118+
}
119+
120+
#[inline(always)]
121+
pub fn direct_step_double(
122+
in_slice: &[f32],
123+
out_slice: &mut [f32],
124+
out_stride: usize,
125+
out_sample: usize,
126+
n: usize,
127+
sinc_table: &[f32],
128+
) {
129+
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
130+
{
131+
if is_x86_feature_detected!("avx") {
132+
unsafe {
133+
avx_wrapper::direct_step_double(
134+
in_slice, out_slice, out_stride, out_sample, n, sinc_table,
135+
);
136+
}
137+
return;
138+
}
139+
140+
if is_x86_feature_detected!("sse3") {
141+
unsafe {
142+
sse3_wrapper::direct_step_double(
143+
in_slice, out_slice, out_stride, out_sample, n, sinc_table,
144+
);
145+
}
146+
return;
147+
}
148+
}
149+
150+
super::native::direct_step_double(
151+
in_slice, out_slice, out_stride, out_sample, n, sinc_table,
152+
);
153+
}
154+
155+
mod avx_wrapper;
156+
mod sse3_wrapper;
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
#[allow(clippy::too_many_arguments)]
2+
#[target_feature(enable = "sse3")]
3+
pub unsafe fn interpolate_step_single(
4+
in_slice: &[f32],
5+
out_slice: &mut [f32],
6+
out_stride: usize,
7+
out_sample: usize,
8+
oversample: usize,
9+
offset: usize,
10+
n: usize,
11+
sinc_table: &[f32],
12+
frac: f32,
13+
) {
14+
crate::speex::sse3::interpolate_step_single(
15+
in_slice, out_slice, out_stride, out_sample, oversample, offset, n,
16+
sinc_table, frac,
17+
);
18+
}
19+
20+
#[allow(clippy::too_many_arguments)]
21+
#[target_feature(enable = "sse3")]
22+
pub unsafe fn interpolate_step_double(
23+
in_slice: &[f32],
24+
out_slice: &mut [f32],
25+
out_stride: usize,
26+
out_sample: usize,
27+
oversample: usize,
28+
offset: usize,
29+
n: usize,
30+
sinc_table: &[f32],
31+
frac: f32,
32+
) {
33+
crate::speex::sse3::interpolate_step_double(
34+
in_slice, out_slice, out_stride, out_sample, oversample, offset, n,
35+
sinc_table, frac,
36+
);
37+
}
38+
39+
#[target_feature(enable = "sse3")]
40+
pub unsafe fn direct_step_single(
41+
in_slice: &[f32],
42+
out_slice: &mut [f32],
43+
out_stride: usize,
44+
out_sample: usize,
45+
n: usize,
46+
sinc_table: &[f32],
47+
) {
48+
crate::speex::sse3::direct_step_single(
49+
in_slice, out_slice, out_stride, out_sample, n, sinc_table,
50+
);
51+
}
52+
53+
#[target_feature(enable = "sse3")]
54+
pub unsafe fn direct_step_double(
55+
in_slice: &[f32],
56+
out_slice: &mut [f32],
57+
out_stride: usize,
58+
out_sample: usize,
59+
n: usize,
60+
sinc_table: &[f32],
61+
) {
62+
crate::speex::sse3::direct_step_double(
63+
in_slice, out_slice, out_stride, out_sample, n, sinc_table,
64+
);
65+
}

0 commit comments

Comments
 (0)