1use crate::{
2 arch::asm,
3 core_arch::{simd::*, x86::*},
4 intrinsics::simd::*,
5 intrinsics::{fmaf32, fmaf64},
6 mem, ptr,
7};
8
9use core::hint::unreachable_unchecked;
10#[cfg(test)]
11use stdarch_test::assert_instr;
12
13/// Computes the absolute values of packed 32-bit integers in `a`.
14///
15/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_abs_epi32&expand=39)
16#[inline]
17#[target_feature(enable = "avx512f")]
18#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19#[cfg_attr(test, assert_instr(vpabsd))]
20#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21pub const fn _mm512_abs_epi32(a: __m512i) -> __m512i {
22 unsafe {
23 let a: Simd = a.as_i32x16();
24 let r: Simd = simd_select::<i32x16, _>(mask:simd_lt(a, i32x16::ZERO), if_true:simd_neg(a), if_false:a);
25 transmute(src:r)
26 }
27}
28
29/// Computes the absolute value of packed 32-bit integers in `a`, and store the
30/// unsigned results in `dst` using writemask `k` (elements are copied from
31/// `src` when the corresponding mask bit is not set).
32///
33/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_abs_epi32&expand=40)
34#[inline]
35#[target_feature(enable = "avx512f")]
36#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37#[cfg_attr(test, assert_instr(vpabsd))]
38#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
39pub const fn _mm512_mask_abs_epi32(src: __m512i, k: __mmask16, a: __m512i) -> __m512i {
40 unsafe {
41 let abs: Simd = _mm512_abs_epi32(a).as_i32x16();
42 transmute(src:simd_select_bitmask(m:k, yes:abs, no:src.as_i32x16()))
43 }
44}
45
46/// Computes the absolute value of packed 32-bit integers in `a`, and store the
47/// unsigned results in `dst` using zeromask `k` (elements are zeroed out when
48/// the corresponding mask bit is not set).
49///
50/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_abs_epi32&expand=41)
51#[inline]
52#[target_feature(enable = "avx512f")]
53#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
54#[cfg_attr(test, assert_instr(vpabsd))]
55#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
56pub const fn _mm512_maskz_abs_epi32(k: __mmask16, a: __m512i) -> __m512i {
57 unsafe {
58 let abs: Simd = _mm512_abs_epi32(a).as_i32x16();
59 transmute(src:simd_select_bitmask(m:k, yes:abs, no:i32x16::ZERO))
60 }
61}
62
63/// Compute the absolute value of packed signed 32-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
64///
65/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_abs_epi32&expand=37)
66#[inline]
67#[target_feature(enable = "avx512f,avx512vl")]
68#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
69#[cfg_attr(test, assert_instr(vpabsd))]
70#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
71pub const fn _mm256_mask_abs_epi32(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
72 unsafe {
73 let abs: Simd = _mm256_abs_epi32(a).as_i32x8();
74 transmute(src:simd_select_bitmask(m:k, yes:abs, no:src.as_i32x8()))
75 }
76}
77
78/// Compute the absolute value of packed signed 32-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
79///
80/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_abs_epi32&expand=38)
81#[inline]
82#[target_feature(enable = "avx512f,avx512vl")]
83#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
84#[cfg_attr(test, assert_instr(vpabsd))]
85#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
86pub const fn _mm256_maskz_abs_epi32(k: __mmask8, a: __m256i) -> __m256i {
87 unsafe {
88 let abs: Simd = _mm256_abs_epi32(a).as_i32x8();
89 transmute(src:simd_select_bitmask(m:k, yes:abs, no:i32x8::ZERO))
90 }
91}
92
93/// Compute the absolute value of packed signed 32-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
94///
95/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_abs_epi32&expand=34)
96#[inline]
97#[target_feature(enable = "avx512f,avx512vl")]
98#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
99#[cfg_attr(test, assert_instr(vpabsd))]
100#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
101pub const fn _mm_mask_abs_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
102 unsafe {
103 let abs: Simd = _mm_abs_epi32(a).as_i32x4();
104 transmute(src:simd_select_bitmask(m:k, yes:abs, no:src.as_i32x4()))
105 }
106}
107
108/// Compute the absolute value of packed signed 32-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
109///
110/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_abs_epi32&expand=35)
111#[inline]
112#[target_feature(enable = "avx512f,avx512vl")]
113#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
114#[cfg_attr(test, assert_instr(vpabsd))]
115#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
116pub const fn _mm_maskz_abs_epi32(k: __mmask8, a: __m128i) -> __m128i {
117 unsafe {
118 let abs: Simd = _mm_abs_epi32(a).as_i32x4();
119 transmute(src:simd_select_bitmask(m:k, yes:abs, no:i32x4::ZERO))
120 }
121}
122
123/// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst.
124///
125/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_abs_epi64&expand=48)
126#[inline]
127#[target_feature(enable = "avx512f")]
128#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
129#[cfg_attr(test, assert_instr(vpabsq))]
130#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
131pub const fn _mm512_abs_epi64(a: __m512i) -> __m512i {
132 unsafe {
133 let a: Simd = a.as_i64x8();
134 let r: Simd = simd_select::<i64x8, _>(mask:simd_lt(a, i64x8::ZERO), if_true:simd_neg(a), if_false:a);
135 transmute(src:r)
136 }
137}
138
139/// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
140///
141/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_abs_epi64&expand=49)
142#[inline]
143#[target_feature(enable = "avx512f")]
144#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
145#[cfg_attr(test, assert_instr(vpabsq))]
146#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
147pub const fn _mm512_mask_abs_epi64(src: __m512i, k: __mmask8, a: __m512i) -> __m512i {
148 unsafe {
149 let abs: Simd = _mm512_abs_epi64(a).as_i64x8();
150 transmute(src:simd_select_bitmask(m:k, yes:abs, no:src.as_i64x8()))
151 }
152}
153
154/// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
155///
156/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_abs_epi64&expand=50)
157#[inline]
158#[target_feature(enable = "avx512f")]
159#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
160#[cfg_attr(test, assert_instr(vpabsq))]
161#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
162pub const fn _mm512_maskz_abs_epi64(k: __mmask8, a: __m512i) -> __m512i {
163 unsafe {
164 let abs: Simd = _mm512_abs_epi64(a).as_i64x8();
165 transmute(src:simd_select_bitmask(m:k, yes:abs, no:i64x8::ZERO))
166 }
167}
168
169/// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst.
170///
171/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_abs_epi64&expand=45)
172#[inline]
173#[target_feature(enable = "avx512f,avx512vl")]
174#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
175#[cfg_attr(test, assert_instr(vpabsq))]
176#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
177pub const fn _mm256_abs_epi64(a: __m256i) -> __m256i {
178 unsafe {
179 let a: Simd = a.as_i64x4();
180 let r: Simd = simd_select::<i64x4, _>(mask:simd_lt(a, i64x4::ZERO), if_true:simd_neg(a), if_false:a);
181 transmute(src:r)
182 }
183}
184
185/// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
186///
187/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_abs_epi64&expand=46)
188#[inline]
189#[target_feature(enable = "avx512f,avx512vl")]
190#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
191#[cfg_attr(test, assert_instr(vpabsq))]
192#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
193pub const fn _mm256_mask_abs_epi64(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
194 unsafe {
195 let abs: Simd = _mm256_abs_epi64(a).as_i64x4();
196 transmute(src:simd_select_bitmask(m:k, yes:abs, no:src.as_i64x4()))
197 }
198}
199
200/// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
201///
202/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_abs_epi64)
203#[inline]
204#[target_feature(enable = "avx512f,avx512vl")]
205#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
206#[cfg_attr(test, assert_instr(vpabsq))]
207#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
208pub const fn _mm256_maskz_abs_epi64(k: __mmask8, a: __m256i) -> __m256i {
209 unsafe {
210 let abs: Simd = _mm256_abs_epi64(a).as_i64x4();
211 transmute(src:simd_select_bitmask(m:k, yes:abs, no:i64x4::ZERO))
212 }
213}
214
215/// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst.
216///
217/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_abs_epi64)
218#[inline]
219#[target_feature(enable = "avx512f,avx512vl")]
220#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
221#[cfg_attr(test, assert_instr(vpabsq))]
222#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
223pub const fn _mm_abs_epi64(a: __m128i) -> __m128i {
224 unsafe {
225 let a: Simd = a.as_i64x2();
226 let r: Simd = simd_select::<i64x2, _>(mask:simd_lt(a, i64x2::ZERO), if_true:simd_neg(a), if_false:a);
227 transmute(src:r)
228 }
229}
230
231/// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
232///
233/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_abs_epi64)
234#[inline]
235#[target_feature(enable = "avx512f,avx512vl")]
236#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
237#[cfg_attr(test, assert_instr(vpabsq))]
238#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
239pub const fn _mm_mask_abs_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
240 unsafe {
241 let abs: Simd = _mm_abs_epi64(a).as_i64x2();
242 transmute(src:simd_select_bitmask(m:k, yes:abs, no:src.as_i64x2()))
243 }
244}
245
246/// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
247///
248/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_abs_epi64)
249#[inline]
250#[target_feature(enable = "avx512f,avx512vl")]
251#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
252#[cfg_attr(test, assert_instr(vpabsq))]
253#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
254pub const fn _mm_maskz_abs_epi64(k: __mmask8, a: __m128i) -> __m128i {
255 unsafe {
256 let abs: Simd = _mm_abs_epi64(a).as_i64x2();
257 transmute(src:simd_select_bitmask(m:k, yes:abs, no:i64x2::ZERO))
258 }
259}
260
261/// Finds the absolute value of each packed single-precision (32-bit) floating-point element in v2, storing the results in dst.
262///
263/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_abs_ps&expand=65)
264#[inline]
265#[target_feature(enable = "avx512f")]
266#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
267#[cfg_attr(test, assert_instr(vpandd))]
268#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
269pub const fn _mm512_abs_ps(v2: __m512) -> __m512 {
270 unsafe { simd_fabs(v2) }
271}
272
273/// Finds the absolute value of each packed single-precision (32-bit) floating-point element in v2, storing the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
274///
275/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_abs_ps&expand=66)
276#[inline]
277#[target_feature(enable = "avx512f")]
278#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
279#[cfg_attr(test, assert_instr(vpandd))]
280#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
281pub const fn _mm512_mask_abs_ps(src: __m512, k: __mmask16, v2: __m512) -> __m512 {
282 unsafe { simd_select_bitmask(m:k, yes:simd_fabs(v2), no:src) }
283}
284
285/// Finds the absolute value of each packed double-precision (64-bit) floating-point element in v2, storing the results in dst.
286///
287/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_abs_pd&expand=60)
288#[inline]
289#[target_feature(enable = "avx512f")]
290#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
291#[cfg_attr(test, assert_instr(vpandq))]
292#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
293pub const fn _mm512_abs_pd(v2: __m512d) -> __m512d {
294 unsafe { simd_fabs(v2) }
295}
296
297/// Finds the absolute value of each packed double-precision (64-bit) floating-point element in v2, storing the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
298///
299/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_abs_pd&expand=61)
300#[inline]
301#[target_feature(enable = "avx512f")]
302#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
303#[cfg_attr(test, assert_instr(vpandq))]
304#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
305pub const fn _mm512_mask_abs_pd(src: __m512d, k: __mmask8, v2: __m512d) -> __m512d {
306 unsafe { simd_select_bitmask(m:k, yes:simd_fabs(v2), no:src) }
307}
308
309/// Move packed 32-bit integers from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
310///
311/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mov_epi32&expand=3801)
312#[inline]
313#[target_feature(enable = "avx512f")]
314#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
315#[cfg_attr(test, assert_instr(vmovdqa32))]
316#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
317pub const fn _mm512_mask_mov_epi32(src: __m512i, k: __mmask16, a: __m512i) -> __m512i {
318 unsafe {
319 let mov: Simd = a.as_i32x16();
320 transmute(src:simd_select_bitmask(m:k, yes:mov, no:src.as_i32x16()))
321 }
322}
323
324/// Move packed 32-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
325///
326/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mov_epi32&expand=3802)
327#[inline]
328#[target_feature(enable = "avx512f")]
329#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
330#[cfg_attr(test, assert_instr(vmovdqa32))]
331#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
332pub const fn _mm512_maskz_mov_epi32(k: __mmask16, a: __m512i) -> __m512i {
333 unsafe {
334 let mov: Simd = a.as_i32x16();
335 transmute(src:simd_select_bitmask(m:k, yes:mov, no:i32x16::ZERO))
336 }
337}
338
339/// Move packed 32-bit integers from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
340///
341/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mov_epi32&expand=3799)
342#[inline]
343#[target_feature(enable = "avx512f,avx512vl")]
344#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
345#[cfg_attr(test, assert_instr(vmovdqa32))]
346#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
347pub const fn _mm256_mask_mov_epi32(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
348 unsafe {
349 let mov: Simd = a.as_i32x8();
350 transmute(src:simd_select_bitmask(m:k, yes:mov, no:src.as_i32x8()))
351 }
352}
353
354/// Move packed 32-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
355///
356/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mov_epi32&expand=3800)
357#[inline]
358#[target_feature(enable = "avx512f,avx512vl")]
359#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
360#[cfg_attr(test, assert_instr(vmovdqa32))]
361#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
362pub const fn _mm256_maskz_mov_epi32(k: __mmask8, a: __m256i) -> __m256i {
363 unsafe {
364 let mov: Simd = a.as_i32x8();
365 transmute(src:simd_select_bitmask(m:k, yes:mov, no:i32x8::ZERO))
366 }
367}
368
369/// Move packed 32-bit integers from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
370///
371/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mov_epi32&expand=3797)
372#[inline]
373#[target_feature(enable = "avx512f,avx512vl")]
374#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
375#[cfg_attr(test, assert_instr(vmovdqa32))]
376#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
377pub const fn _mm_mask_mov_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
378 unsafe {
379 let mov: Simd = a.as_i32x4();
380 transmute(src:simd_select_bitmask(m:k, yes:mov, no:src.as_i32x4()))
381 }
382}
383
384/// Move packed 32-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
385///
386/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mov_epi32&expand=3798)
387#[inline]
388#[target_feature(enable = "avx512f,avx512vl")]
389#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
390#[cfg_attr(test, assert_instr(vmovdqa32))]
391#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
392pub const fn _mm_maskz_mov_epi32(k: __mmask8, a: __m128i) -> __m128i {
393 unsafe {
394 let mov: Simd = a.as_i32x4();
395 transmute(src:simd_select_bitmask(m:k, yes:mov, no:i32x4::ZERO))
396 }
397}
398
399/// Move packed 64-bit integers from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
400///
401/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mov_epi64&expand=3807)
402#[inline]
403#[target_feature(enable = "avx512f")]
404#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
405#[cfg_attr(test, assert_instr(vmovdqa64))]
406#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
407pub const fn _mm512_mask_mov_epi64(src: __m512i, k: __mmask8, a: __m512i) -> __m512i {
408 unsafe {
409 let mov: Simd = a.as_i64x8();
410 transmute(src:simd_select_bitmask(m:k, yes:mov, no:src.as_i64x8()))
411 }
412}
413
414/// Move packed 64-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
415///
416/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mov_epi64&expand=3808)
417#[inline]
418#[target_feature(enable = "avx512f")]
419#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
420#[cfg_attr(test, assert_instr(vmovdqa64))]
421#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
422pub const fn _mm512_maskz_mov_epi64(k: __mmask8, a: __m512i) -> __m512i {
423 unsafe {
424 let mov: Simd = a.as_i64x8();
425 transmute(src:simd_select_bitmask(m:k, yes:mov, no:i64x8::ZERO))
426 }
427}
428
429/// Move packed 64-bit integers from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
430///
431/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mov_epi64&expand=3805)
432#[inline]
433#[target_feature(enable = "avx512f,avx512vl")]
434#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
435#[cfg_attr(test, assert_instr(vmovdqa64))]
436#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
437pub const fn _mm256_mask_mov_epi64(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
438 unsafe {
439 let mov: Simd = a.as_i64x4();
440 transmute(src:simd_select_bitmask(m:k, yes:mov, no:src.as_i64x4()))
441 }
442}
443
444/// Move packed 64-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
445///
446/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mov_epi64&expand=3806)
447#[inline]
448#[target_feature(enable = "avx512f,avx512vl")]
449#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
450#[cfg_attr(test, assert_instr(vmovdqa64))]
451#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
452pub const fn _mm256_maskz_mov_epi64(k: __mmask8, a: __m256i) -> __m256i {
453 unsafe {
454 let mov: Simd = a.as_i64x4();
455 transmute(src:simd_select_bitmask(m:k, yes:mov, no:i64x4::ZERO))
456 }
457}
458
459/// Move packed 64-bit integers from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
460///
461/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mov_epi64&expand=3803)
462#[inline]
463#[target_feature(enable = "avx512f,avx512vl")]
464#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
465#[cfg_attr(test, assert_instr(vmovdqa64))]
466#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
467pub const fn _mm_mask_mov_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
468 unsafe {
469 let mov: Simd = a.as_i64x2();
470 transmute(src:simd_select_bitmask(m:k, yes:mov, no:src.as_i64x2()))
471 }
472}
473
474/// Move packed 64-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
475///
476/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mov_epi64&expand=3804)
477#[inline]
478#[target_feature(enable = "avx512f,avx512vl")]
479#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
480#[cfg_attr(test, assert_instr(vmovdqa64))]
481#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
482pub const fn _mm_maskz_mov_epi64(k: __mmask8, a: __m128i) -> __m128i {
483 unsafe {
484 let mov: Simd = a.as_i64x2();
485 transmute(src:simd_select_bitmask(m:k, yes:mov, no:i64x2::ZERO))
486 }
487}
488
489/// Move packed single-precision (32-bit) floating-point elements from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
490///
491/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mov_ps&expand=3825)
492#[inline]
493#[target_feature(enable = "avx512f")]
494#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
495#[cfg_attr(test, assert_instr(vmovaps))]
496#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
497pub const fn _mm512_mask_mov_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 {
498 unsafe {
499 let mov: Simd = a.as_f32x16();
500 transmute(src:simd_select_bitmask(m:k, yes:mov, no:src.as_f32x16()))
501 }
502}
503
504/// Move packed single-precision (32-bit) floating-point elements from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
505///
506/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mov_ps&expand=3826)
507#[inline]
508#[target_feature(enable = "avx512f")]
509#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
510#[cfg_attr(test, assert_instr(vmovaps))]
511#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
512pub const fn _mm512_maskz_mov_ps(k: __mmask16, a: __m512) -> __m512 {
513 unsafe {
514 let mov: Simd = a.as_f32x16();
515 transmute(src:simd_select_bitmask(m:k, yes:mov, no:f32x16::ZERO))
516 }
517}
518
519/// Move packed single-precision (32-bit) floating-point elements from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
520///
521/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mov_ps&expand=3823)
522#[inline]
523#[target_feature(enable = "avx512f,avx512vl")]
524#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
525#[cfg_attr(test, assert_instr(vmovaps))]
526#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
527pub const fn _mm256_mask_mov_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 {
528 unsafe {
529 let mov: Simd = a.as_f32x8();
530 transmute(src:simd_select_bitmask(m:k, yes:mov, no:src.as_f32x8()))
531 }
532}
533
534/// Move packed single-precision (32-bit) floating-point elements from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
535///
536/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mov_ps&expand=3824)
537#[inline]
538#[target_feature(enable = "avx512f,avx512vl")]
539#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
540#[cfg_attr(test, assert_instr(vmovaps))]
541#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
542pub const fn _mm256_maskz_mov_ps(k: __mmask8, a: __m256) -> __m256 {
543 unsafe {
544 let mov: Simd = a.as_f32x8();
545 transmute(src:simd_select_bitmask(m:k, yes:mov, no:f32x8::ZERO))
546 }
547}
548
549/// Move packed single-precision (32-bit) floating-point elements from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
550///
551/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mov_ps&expand=3821)
552#[inline]
553#[target_feature(enable = "avx512f,avx512vl")]
554#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
555#[cfg_attr(test, assert_instr(vmovaps))]
556#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
557pub const fn _mm_mask_mov_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 {
558 unsafe {
559 let mov: Simd = a.as_f32x4();
560 transmute(src:simd_select_bitmask(m:k, yes:mov, no:src.as_f32x4()))
561 }
562}
563
564/// Move packed single-precision (32-bit) floating-point elements from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
565///
566/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mov_ps&expand=3822)
567#[inline]
568#[target_feature(enable = "avx512f,avx512vl")]
569#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
570#[cfg_attr(test, assert_instr(vmovaps))]
571#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
572pub const fn _mm_maskz_mov_ps(k: __mmask8, a: __m128) -> __m128 {
573 unsafe {
574 let mov: Simd = a.as_f32x4();
575 transmute(src:simd_select_bitmask(m:k, yes:mov, no:f32x4::ZERO))
576 }
577}
578
579/// Move packed double-precision (64-bit) floating-point elements from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
580///
581/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mov_pd&expand=3819)
582#[inline]
583#[target_feature(enable = "avx512f")]
584#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
585#[cfg_attr(test, assert_instr(vmovapd))]
586#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
587pub const fn _mm512_mask_mov_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d {
588 unsafe {
589 let mov: Simd = a.as_f64x8();
590 transmute(src:simd_select_bitmask(m:k, yes:mov, no:src.as_f64x8()))
591 }
592}
593
594/// Move packed double-precision (64-bit) floating-point elements from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
595///
596/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mov_pd&expand=3820)
597#[inline]
598#[target_feature(enable = "avx512f")]
599#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
600#[cfg_attr(test, assert_instr(vmovapd))]
601#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
602pub const fn _mm512_maskz_mov_pd(k: __mmask8, a: __m512d) -> __m512d {
603 unsafe {
604 let mov: Simd = a.as_f64x8();
605 transmute(src:simd_select_bitmask(m:k, yes:mov, no:f64x8::ZERO))
606 }
607}
608
609/// Move packed double-precision (64-bit) floating-point elements from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
610///
611/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mov_pd&expand=3817)
612#[inline]
613#[target_feature(enable = "avx512f,avx512vl")]
614#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
615#[cfg_attr(test, assert_instr(vmovapd))]
616#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
617pub const fn _mm256_mask_mov_pd(src: __m256d, k: __mmask8, a: __m256d) -> __m256d {
618 unsafe {
619 let mov: Simd = a.as_f64x4();
620 transmute(src:simd_select_bitmask(m:k, yes:mov, no:src.as_f64x4()))
621 }
622}
623
624/// Move packed double-precision (64-bit) floating-point elements from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
625///
626/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mov_pd&expand=3818)
627#[inline]
628#[target_feature(enable = "avx512f,avx512vl")]
629#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
630#[cfg_attr(test, assert_instr(vmovapd))]
631#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
632pub const fn _mm256_maskz_mov_pd(k: __mmask8, a: __m256d) -> __m256d {
633 unsafe {
634 let mov: Simd = a.as_f64x4();
635 transmute(src:simd_select_bitmask(m:k, yes:mov, no:f64x4::ZERO))
636 }
637}
638
639/// Move packed double-precision (64-bit) floating-point elements from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
640///
641/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mov_pd&expand=3815)
642#[inline]
643#[target_feature(enable = "avx512f,avx512vl")]
644#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
645#[cfg_attr(test, assert_instr(vmovapd))]
646#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
647pub const fn _mm_mask_mov_pd(src: __m128d, k: __mmask8, a: __m128d) -> __m128d {
648 unsafe {
649 let mov: Simd = a.as_f64x2();
650 transmute(src:simd_select_bitmask(m:k, yes:mov, no:src.as_f64x2()))
651 }
652}
653
654/// Move packed double-precision (64-bit) floating-point elements from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
655///
656/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mov_pd&expand=3816)
657#[inline]
658#[target_feature(enable = "avx512f,avx512vl")]
659#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
660#[cfg_attr(test, assert_instr(vmovapd))]
661#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
662pub const fn _mm_maskz_mov_pd(k: __mmask8, a: __m128d) -> __m128d {
663 unsafe {
664 let mov: Simd = a.as_f64x2();
665 transmute(src:simd_select_bitmask(m:k, yes:mov, no:f64x2::ZERO))
666 }
667}
668
669/// Add packed 32-bit integers in a and b, and store the results in dst.
670///
671/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_add_epi32&expand=100)
672#[inline]
673#[target_feature(enable = "avx512f")]
674#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
675#[cfg_attr(test, assert_instr(vpaddd))]
676#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
677pub const fn _mm512_add_epi32(a: __m512i, b: __m512i) -> __m512i {
678 unsafe { transmute(src:simd_add(x:a.as_i32x16(), y:b.as_i32x16())) }
679}
680
681/// Add packed 32-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
682///
683/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_add_epi32&expand=101)
684#[inline]
685#[target_feature(enable = "avx512f")]
686#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
687#[cfg_attr(test, assert_instr(vpaddd))]
688#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
689pub const fn _mm512_mask_add_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
690 unsafe {
691 let add: Simd = _mm512_add_epi32(a, b).as_i32x16();
692 transmute(src:simd_select_bitmask(m:k, yes:add, no:src.as_i32x16()))
693 }
694}
695
696/// Add packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
697///
698/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_add_epi32&expand=102)
699#[inline]
700#[target_feature(enable = "avx512f")]
701#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
702#[cfg_attr(test, assert_instr(vpaddd))]
703#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
704pub const fn _mm512_maskz_add_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
705 unsafe {
706 let add: Simd = _mm512_add_epi32(a, b).as_i32x16();
707 transmute(src:simd_select_bitmask(m:k, yes:add, no:i32x16::ZERO))
708 }
709}
710
711/// Add packed 32-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
712///
713/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_add_epi32&expand=98)
714#[inline]
715#[target_feature(enable = "avx512f,avx512vl")]
716#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
717#[cfg_attr(test, assert_instr(vpaddd))]
718#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
719pub const fn _mm256_mask_add_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
720 unsafe {
721 let add: Simd = _mm256_add_epi32(a, b).as_i32x8();
722 transmute(src:simd_select_bitmask(m:k, yes:add, no:src.as_i32x8()))
723 }
724}
725
726/// Add packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
727///
728/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_add_epi32&expand=99)
729#[inline]
730#[target_feature(enable = "avx512f,avx512vl")]
731#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
732#[cfg_attr(test, assert_instr(vpaddd))]
733#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
734pub const fn _mm256_maskz_add_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
735 unsafe {
736 let add: Simd = _mm256_add_epi32(a, b).as_i32x8();
737 transmute(src:simd_select_bitmask(m:k, yes:add, no:i32x8::ZERO))
738 }
739}
740
741/// Add packed 32-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
742///
743/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_add_epi32&expand=95)
744#[inline]
745#[target_feature(enable = "avx512f,avx512vl")]
746#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
747#[cfg_attr(test, assert_instr(vpaddd))]
748#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
749pub const fn _mm_mask_add_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
750 unsafe {
751 let add: Simd = _mm_add_epi32(a, b).as_i32x4();
752 transmute(src:simd_select_bitmask(m:k, yes:add, no:src.as_i32x4()))
753 }
754}
755
756/// Add packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
757///
758/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_add_epi32&expand=96)
759#[inline]
760#[target_feature(enable = "avx512f,avx512vl")]
761#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
762#[cfg_attr(test, assert_instr(vpaddd))]
763#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
764pub const fn _mm_maskz_add_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
765 unsafe {
766 let add: Simd = _mm_add_epi32(a, b).as_i32x4();
767 transmute(src:simd_select_bitmask(m:k, yes:add, no:i32x4::ZERO))
768 }
769}
770
771/// Add packed 64-bit integers in a and b, and store the results in dst.
772///
773/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_add_epi64&expand=109)
774#[inline]
775#[target_feature(enable = "avx512f")]
776#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
777#[cfg_attr(test, assert_instr(vpaddq))]
778#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
779pub const fn _mm512_add_epi64(a: __m512i, b: __m512i) -> __m512i {
780 unsafe { transmute(src:simd_add(x:a.as_i64x8(), y:b.as_i64x8())) }
781}
782
783/// Add packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
784///
785/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_add_epi64&expand=110)
786#[inline]
787#[target_feature(enable = "avx512f")]
788#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
789#[cfg_attr(test, assert_instr(vpaddq))]
790#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
791pub const fn _mm512_mask_add_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
792 unsafe {
793 let add: Simd = _mm512_add_epi64(a, b).as_i64x8();
794 transmute(src:simd_select_bitmask(m:k, yes:add, no:src.as_i64x8()))
795 }
796}
797
798/// Add packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
799///
800/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_add_epi64&expand=111)
801#[inline]
802#[target_feature(enable = "avx512f")]
803#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
804#[cfg_attr(test, assert_instr(vpaddq))]
805#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
806pub const fn _mm512_maskz_add_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
807 unsafe {
808 let add: Simd = _mm512_add_epi64(a, b).as_i64x8();
809 transmute(src:simd_select_bitmask(m:k, yes:add, no:i64x8::ZERO))
810 }
811}
812
813/// Add packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
814///
815/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_add_epi64&expand=107)
816#[inline]
817#[target_feature(enable = "avx512f,avx512vl")]
818#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
819#[cfg_attr(test, assert_instr(vpaddq))]
820#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
821pub const fn _mm256_mask_add_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
822 unsafe {
823 let add: Simd = _mm256_add_epi64(a, b).as_i64x4();
824 transmute(src:simd_select_bitmask(m:k, yes:add, no:src.as_i64x4()))
825 }
826}
827
828/// Add packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
829///
830/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_add_epi64&expand=108)
831#[inline]
832#[target_feature(enable = "avx512f,avx512vl")]
833#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
834#[cfg_attr(test, assert_instr(vpaddq))]
835#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
836pub const fn _mm256_maskz_add_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
837 unsafe {
838 let add: Simd = _mm256_add_epi64(a, b).as_i64x4();
839 transmute(src:simd_select_bitmask(m:k, yes:add, no:i64x4::ZERO))
840 }
841}
842
843/// Add packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
844///
845/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_add_epi64&expand=104)
846#[inline]
847#[target_feature(enable = "avx512f,avx512vl")]
848#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
849#[cfg_attr(test, assert_instr(vpaddq))]
850#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
851pub const fn _mm_mask_add_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
852 unsafe {
853 let add: Simd = _mm_add_epi64(a, b).as_i64x2();
854 transmute(src:simd_select_bitmask(m:k, yes:add, no:src.as_i64x2()))
855 }
856}
857
858/// Add packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
859///
860/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_add_epi64&expand=105)
861#[inline]
862#[target_feature(enable = "avx512f,avx512vl")]
863#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
864#[cfg_attr(test, assert_instr(vpaddq))]
865#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
866pub const fn _mm_maskz_add_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
867 unsafe {
868 let add: Simd = _mm_add_epi64(a, b).as_i64x2();
869 transmute(src:simd_select_bitmask(m:k, yes:add, no:i64x2::ZERO))
870 }
871}
872
873/// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst.
874///
875/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_add_ps&expand=139)
876#[inline]
877#[target_feature(enable = "avx512f")]
878#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
879#[cfg_attr(test, assert_instr(vaddps))]
880#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
881pub const fn _mm512_add_ps(a: __m512, b: __m512) -> __m512 {
882 unsafe { transmute(src:simd_add(x:a.as_f32x16(), y:b.as_f32x16())) }
883}
884
885/// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
886///
887/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_add_ps&expand=140)
888#[inline]
889#[target_feature(enable = "avx512f")]
890#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
891#[cfg_attr(test, assert_instr(vaddps))]
892#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
893pub const fn _mm512_mask_add_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
894 unsafe {
895 let add: Simd = _mm512_add_ps(a, b).as_f32x16();
896 transmute(src:simd_select_bitmask(m:k, yes:add, no:src.as_f32x16()))
897 }
898}
899
900/// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
901///
902/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_add_ps&expand=141)
903#[inline]
904#[target_feature(enable = "avx512f")]
905#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
906#[cfg_attr(test, assert_instr(vaddps))]
907#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
908pub const fn _mm512_maskz_add_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
909 unsafe {
910 let add: Simd = _mm512_add_ps(a, b).as_f32x16();
911 transmute(src:simd_select_bitmask(m:k, yes:add, no:f32x16::ZERO))
912 }
913}
914
915/// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
916///
917/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_add_ps&expand=137)
918#[inline]
919#[target_feature(enable = "avx512f,avx512vl")]
920#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
921#[cfg_attr(test, assert_instr(vaddps))]
922#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
923pub const fn _mm256_mask_add_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
924 unsafe {
925 let add: Simd = _mm256_add_ps(a, b).as_f32x8();
926 transmute(src:simd_select_bitmask(m:k, yes:add, no:src.as_f32x8()))
927 }
928}
929
930/// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
931///
932/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_add_ps&expand=138)
933#[inline]
934#[target_feature(enable = "avx512f,avx512vl")]
935#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
936#[cfg_attr(test, assert_instr(vaddps))]
937#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
938pub const fn _mm256_maskz_add_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
939 unsafe {
940 let add: Simd = _mm256_add_ps(a, b).as_f32x8();
941 transmute(src:simd_select_bitmask(m:k, yes:add, no:f32x8::ZERO))
942 }
943}
944
945/// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
946///
947/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_add_ps&expand=134)
948#[inline]
949#[target_feature(enable = "avx512f,avx512vl")]
950#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
951#[cfg_attr(test, assert_instr(vaddps))]
952#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
953pub const fn _mm_mask_add_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
954 unsafe {
955 let add: Simd = _mm_add_ps(a, b).as_f32x4();
956 transmute(src:simd_select_bitmask(m:k, yes:add, no:src.as_f32x4()))
957 }
958}
959
960/// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
961///
962/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_add_ps&expand=135)
963#[inline]
964#[target_feature(enable = "avx512f,avx512vl")]
965#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
966#[cfg_attr(test, assert_instr(vaddps))]
967#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
968pub const fn _mm_maskz_add_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
969 unsafe {
970 let add: Simd = _mm_add_ps(a, b).as_f32x4();
971 transmute(src:simd_select_bitmask(m:k, yes:add, no:f32x4::ZERO))
972 }
973}
974
975/// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst.
976///
977/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_add_pd&expand=127)
978#[inline]
979#[target_feature(enable = "avx512f")]
980#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
981#[cfg_attr(test, assert_instr(vaddpd))]
982#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
983pub const fn _mm512_add_pd(a: __m512d, b: __m512d) -> __m512d {
984 unsafe { transmute(src:simd_add(x:a.as_f64x8(), y:b.as_f64x8())) }
985}
986
987/// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
988///
989/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_add_pd&expand=128)
990#[inline]
991#[target_feature(enable = "avx512f")]
992#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
993#[cfg_attr(test, assert_instr(vaddpd))]
994#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
995pub const fn _mm512_mask_add_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
996 unsafe {
997 let add: Simd = _mm512_add_pd(a, b).as_f64x8();
998 transmute(src:simd_select_bitmask(m:k, yes:add, no:src.as_f64x8()))
999 }
1000}
1001
1002/// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1003///
1004/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_add_pd&expand=129)
1005#[inline]
1006#[target_feature(enable = "avx512f")]
1007#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1008#[cfg_attr(test, assert_instr(vaddpd))]
1009#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1010pub const fn _mm512_maskz_add_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
1011 unsafe {
1012 let add: Simd = _mm512_add_pd(a, b).as_f64x8();
1013 transmute(src:simd_select_bitmask(m:k, yes:add, no:f64x8::ZERO))
1014 }
1015}
1016
1017/// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1018///
1019/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_add_pd&expand=125)
1020#[inline]
1021#[target_feature(enable = "avx512f,avx512vl")]
1022#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1023#[cfg_attr(test, assert_instr(vaddpd))]
1024#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1025pub const fn _mm256_mask_add_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
1026 unsafe {
1027 let add: Simd = _mm256_add_pd(a, b).as_f64x4();
1028 transmute(src:simd_select_bitmask(m:k, yes:add, no:src.as_f64x4()))
1029 }
1030}
1031
1032/// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1033///
1034/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_add_pd&expand=126)
1035#[inline]
1036#[target_feature(enable = "avx512f,avx512vl")]
1037#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1038#[cfg_attr(test, assert_instr(vaddpd))]
1039#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1040pub const fn _mm256_maskz_add_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
1041 unsafe {
1042 let add: Simd = _mm256_add_pd(a, b).as_f64x4();
1043 transmute(src:simd_select_bitmask(m:k, yes:add, no:f64x4::ZERO))
1044 }
1045}
1046
1047/// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1048///
1049/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_add_pd&expand=122)
1050#[inline]
1051#[target_feature(enable = "avx512f,avx512vl")]
1052#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1053#[cfg_attr(test, assert_instr(vaddpd))]
1054#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1055pub const fn _mm_mask_add_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
1056 unsafe {
1057 let add: Simd = _mm_add_pd(a, b).as_f64x2();
1058 transmute(src:simd_select_bitmask(m:k, yes:add, no:src.as_f64x2()))
1059 }
1060}
1061
1062/// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1063///
1064/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_add_pd&expand=123)
1065#[inline]
1066#[target_feature(enable = "avx512f,avx512vl")]
1067#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1068#[cfg_attr(test, assert_instr(vaddpd))]
1069#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1070pub const fn _mm_maskz_add_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
1071 unsafe {
1072 let add: Simd = _mm_add_pd(a, b).as_f64x2();
1073 transmute(src:simd_select_bitmask(m:k, yes:add, no:f64x2::ZERO))
1074 }
1075}
1076
1077/// Subtract packed 32-bit integers in b from packed 32-bit integers in a, and store the results in dst.
1078///
1079/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sub_epi32&expand=5694)
1080#[inline]
1081#[target_feature(enable = "avx512f")]
1082#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1083#[cfg_attr(test, assert_instr(vpsubd))]
1084#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1085pub const fn _mm512_sub_epi32(a: __m512i, b: __m512i) -> __m512i {
1086 unsafe { transmute(src:simd_sub(lhs:a.as_i32x16(), rhs:b.as_i32x16())) }
1087}
1088
1089/// Subtract packed 32-bit integers in b from packed 32-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1090///
1091/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sub_epi32&expand=5692)
1092#[inline]
1093#[target_feature(enable = "avx512f")]
1094#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1095#[cfg_attr(test, assert_instr(vpsubd))]
1096#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1097pub const fn _mm512_mask_sub_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
1098 unsafe {
1099 let sub: Simd = _mm512_sub_epi32(a, b).as_i32x16();
1100 transmute(src:simd_select_bitmask(m:k, yes:sub, no:src.as_i32x16()))
1101 }
1102}
1103
1104/// Subtract packed 32-bit integers in b from packed 32-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1105///
1106/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sub_epi32&expand=5693)
1107#[inline]
1108#[target_feature(enable = "avx512f")]
1109#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1110#[cfg_attr(test, assert_instr(vpsubd))]
1111#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1112pub const fn _mm512_maskz_sub_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
1113 unsafe {
1114 let sub: Simd = _mm512_sub_epi32(a, b).as_i32x16();
1115 transmute(src:simd_select_bitmask(m:k, yes:sub, no:i32x16::ZERO))
1116 }
1117}
1118
1119/// Subtract packed 32-bit integers in b from packed 32-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1120///
1121/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sub_epi32&expand=5689)
1122#[inline]
1123#[target_feature(enable = "avx512f,avx512vl")]
1124#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1125#[cfg_attr(test, assert_instr(vpsubd))]
1126#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1127pub const fn _mm256_mask_sub_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
1128 unsafe {
1129 let sub: Simd = _mm256_sub_epi32(a, b).as_i32x8();
1130 transmute(src:simd_select_bitmask(m:k, yes:sub, no:src.as_i32x8()))
1131 }
1132}
1133
1134/// Subtract packed 32-bit integers in b from packed 32-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1135///
1136/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sub_epi32&expand=5690)
1137#[inline]
1138#[target_feature(enable = "avx512f,avx512vl")]
1139#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1140#[cfg_attr(test, assert_instr(vpsubd))]
1141#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1142pub const fn _mm256_maskz_sub_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
1143 unsafe {
1144 let sub: Simd = _mm256_sub_epi32(a, b).as_i32x8();
1145 transmute(src:simd_select_bitmask(m:k, yes:sub, no:i32x8::ZERO))
1146 }
1147}
1148
1149/// Subtract packed 32-bit integers in b from packed 32-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1150///
1151/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sub_epi32&expand=5686)
1152#[inline]
1153#[target_feature(enable = "avx512f,avx512vl")]
1154#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1155#[cfg_attr(test, assert_instr(vpsubd))]
1156#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1157pub const fn _mm_mask_sub_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1158 unsafe {
1159 let sub: Simd = _mm_sub_epi32(a, b).as_i32x4();
1160 transmute(src:simd_select_bitmask(m:k, yes:sub, no:src.as_i32x4()))
1161 }
1162}
1163
1164/// Subtract packed 32-bit integers in b from packed 32-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1165///
1166/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sub_epi32&expand=5687)
1167#[inline]
1168#[target_feature(enable = "avx512f,avx512vl")]
1169#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1170#[cfg_attr(test, assert_instr(vpsubd))]
1171#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1172pub const fn _mm_maskz_sub_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1173 unsafe {
1174 let sub: Simd = _mm_sub_epi32(a, b).as_i32x4();
1175 transmute(src:simd_select_bitmask(m:k, yes:sub, no:i32x4::ZERO))
1176 }
1177}
1178
1179/// Subtract packed 64-bit integers in b from packed 64-bit integers in a, and store the results in dst.
1180///
1181/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sub_epi64&expand=5703)
1182#[inline]
1183#[target_feature(enable = "avx512f")]
1184#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1185#[cfg_attr(test, assert_instr(vpsubq))]
1186#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1187pub const fn _mm512_sub_epi64(a: __m512i, b: __m512i) -> __m512i {
1188 unsafe { transmute(src:simd_sub(lhs:a.as_i64x8(), rhs:b.as_i64x8())) }
1189}
1190
1191/// Subtract packed 64-bit integers in b from packed 64-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1192///
1193/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sub_epi64&expand=5701)
1194#[inline]
1195#[target_feature(enable = "avx512f")]
1196#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1197#[cfg_attr(test, assert_instr(vpsubq))]
1198#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1199pub const fn _mm512_mask_sub_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
1200 unsafe {
1201 let sub: Simd = _mm512_sub_epi64(a, b).as_i64x8();
1202 transmute(src:simd_select_bitmask(m:k, yes:sub, no:src.as_i64x8()))
1203 }
1204}
1205
1206/// Subtract packed 64-bit integers in b from packed 64-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1207///
1208/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sub_epi64&expand=5702)
1209#[inline]
1210#[target_feature(enable = "avx512f")]
1211#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1212#[cfg_attr(test, assert_instr(vpsubq))]
1213#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1214pub const fn _mm512_maskz_sub_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
1215 unsafe {
1216 let sub: Simd = _mm512_sub_epi64(a, b).as_i64x8();
1217 transmute(src:simd_select_bitmask(m:k, yes:sub, no:i64x8::ZERO))
1218 }
1219}
1220
1221/// Subtract packed 64-bit integers in b from packed 64-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1222///
1223/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sub_epi64&expand=5698)
1224#[inline]
1225#[target_feature(enable = "avx512f,avx512vl")]
1226#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1227#[cfg_attr(test, assert_instr(vpsubq))]
1228#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1229pub const fn _mm256_mask_sub_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
1230 unsafe {
1231 let sub: Simd = _mm256_sub_epi64(a, b).as_i64x4();
1232 transmute(src:simd_select_bitmask(m:k, yes:sub, no:src.as_i64x4()))
1233 }
1234}
1235
1236/// Subtract packed 64-bit integers in b from packed 64-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1237///
1238/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sub_epi64&expand=5699)
1239#[inline]
1240#[target_feature(enable = "avx512f,avx512vl")]
1241#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1242#[cfg_attr(test, assert_instr(vpsubq))]
1243#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1244pub const fn _mm256_maskz_sub_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
1245 unsafe {
1246 let sub: Simd = _mm256_sub_epi64(a, b).as_i64x4();
1247 transmute(src:simd_select_bitmask(m:k, yes:sub, no:i64x4::ZERO))
1248 }
1249}
1250
1251/// Subtract packed 64-bit integers in b from packed 64-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1252///
1253/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sub_epi64&expand=5695)
1254#[inline]
1255#[target_feature(enable = "avx512f,avx512vl")]
1256#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1257#[cfg_attr(test, assert_instr(vpsubq))]
1258#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1259pub const fn _mm_mask_sub_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1260 unsafe {
1261 let sub: Simd = _mm_sub_epi64(a, b).as_i64x2();
1262 transmute(src:simd_select_bitmask(m:k, yes:sub, no:src.as_i64x2()))
1263 }
1264}
1265
1266/// Subtract packed 64-bit integers in b from packed 64-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1267///
1268/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sub_epi64&expand=5696)
1269#[inline]
1270#[target_feature(enable = "avx512f,avx512vl")]
1271#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1272#[cfg_attr(test, assert_instr(vpsubq))]
1273#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1274pub const fn _mm_maskz_sub_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1275 unsafe {
1276 let sub: Simd = _mm_sub_epi64(a, b).as_i64x2();
1277 transmute(src:simd_select_bitmask(m:k, yes:sub, no:i64x2::ZERO))
1278 }
1279}
1280
1281/// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst.
1282///
1283/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sub_ps&expand=5733)
1284#[inline]
1285#[target_feature(enable = "avx512f")]
1286#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1287#[cfg_attr(test, assert_instr(vsubps))]
1288#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1289pub const fn _mm512_sub_ps(a: __m512, b: __m512) -> __m512 {
1290 unsafe { transmute(src:simd_sub(lhs:a.as_f32x16(), rhs:b.as_f32x16())) }
1291}
1292
1293/// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1294///
1295/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sub_ps&expand=5731)
1296#[inline]
1297#[target_feature(enable = "avx512f")]
1298#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1299#[cfg_attr(test, assert_instr(vsubps))]
1300#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1301pub const fn _mm512_mask_sub_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
1302 unsafe {
1303 let sub: Simd = _mm512_sub_ps(a, b).as_f32x16();
1304 transmute(src:simd_select_bitmask(m:k, yes:sub, no:src.as_f32x16()))
1305 }
1306}
1307
1308/// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1309///
1310/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sub_ps&expand=5732)
1311#[inline]
1312#[target_feature(enable = "avx512f")]
1313#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1314#[cfg_attr(test, assert_instr(vsubps))]
1315#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1316pub const fn _mm512_maskz_sub_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
1317 unsafe {
1318 let sub: Simd = _mm512_sub_ps(a, b).as_f32x16();
1319 transmute(src:simd_select_bitmask(m:k, yes:sub, no:f32x16::ZERO))
1320 }
1321}
1322
1323/// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1324///
1325/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sub_ps&expand=5728)
1326#[inline]
1327#[target_feature(enable = "avx512f,avx512vl")]
1328#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1329#[cfg_attr(test, assert_instr(vsubps))]
1330#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1331pub const fn _mm256_mask_sub_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
1332 unsafe {
1333 let sub: Simd = _mm256_sub_ps(a, b).as_f32x8();
1334 transmute(src:simd_select_bitmask(m:k, yes:sub, no:src.as_f32x8()))
1335 }
1336}
1337
1338/// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1339///
1340/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sub_ps&expand=5729)
1341#[inline]
1342#[target_feature(enable = "avx512f,avx512vl")]
1343#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1344#[cfg_attr(test, assert_instr(vsubps))]
1345#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1346pub const fn _mm256_maskz_sub_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
1347 unsafe {
1348 let sub: Simd = _mm256_sub_ps(a, b).as_f32x8();
1349 transmute(src:simd_select_bitmask(m:k, yes:sub, no:f32x8::ZERO))
1350 }
1351}
1352
1353/// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1354///
1355/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sub_ps&expand=5725)
1356#[inline]
1357#[target_feature(enable = "avx512f,avx512vl")]
1358#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1359#[cfg_attr(test, assert_instr(vsubps))]
1360#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1361pub const fn _mm_mask_sub_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
1362 unsafe {
1363 let sub: Simd = _mm_sub_ps(a, b).as_f32x4();
1364 transmute(src:simd_select_bitmask(m:k, yes:sub, no:src.as_f32x4()))
1365 }
1366}
1367
1368/// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1369///
1370/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sub_ps&expand=5726)
1371#[inline]
1372#[target_feature(enable = "avx512f,avx512vl")]
1373#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1374#[cfg_attr(test, assert_instr(vsubps))]
1375#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1376pub const fn _mm_maskz_sub_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
1377 unsafe {
1378 let sub: Simd = _mm_sub_ps(a, b).as_f32x4();
1379 transmute(src:simd_select_bitmask(m:k, yes:sub, no:f32x4::ZERO))
1380 }
1381}
1382
1383/// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst.
1384///
1385/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sub_pd&expand=5721)
1386#[inline]
1387#[target_feature(enable = "avx512f")]
1388#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1389#[cfg_attr(test, assert_instr(vsubpd))]
1390#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1391pub const fn _mm512_sub_pd(a: __m512d, b: __m512d) -> __m512d {
1392 unsafe { transmute(src:simd_sub(lhs:a.as_f64x8(), rhs:b.as_f64x8())) }
1393}
1394
1395/// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1396///
1397/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sub_pd&expand=5719)
1398#[inline]
1399#[target_feature(enable = "avx512f")]
1400#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1401#[cfg_attr(test, assert_instr(vsubpd))]
1402#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1403pub const fn _mm512_mask_sub_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
1404 unsafe {
1405 let sub: Simd = _mm512_sub_pd(a, b).as_f64x8();
1406 transmute(src:simd_select_bitmask(m:k, yes:sub, no:src.as_f64x8()))
1407 }
1408}
1409
1410/// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1411///
1412/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sub_pd&expand=5720)
1413#[inline]
1414#[target_feature(enable = "avx512f")]
1415#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1416#[cfg_attr(test, assert_instr(vsubpd))]
1417#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1418pub const fn _mm512_maskz_sub_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
1419 unsafe {
1420 let sub: Simd = _mm512_sub_pd(a, b).as_f64x8();
1421 transmute(src:simd_select_bitmask(m:k, yes:sub, no:f64x8::ZERO))
1422 }
1423}
1424
1425/// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1426///
1427/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sub_pd&expand=5716)
1428#[inline]
1429#[target_feature(enable = "avx512f,avx512vl")]
1430#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1431#[cfg_attr(test, assert_instr(vsubpd))]
1432#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1433pub const fn _mm256_mask_sub_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
1434 unsafe {
1435 let sub: Simd = _mm256_sub_pd(a, b).as_f64x4();
1436 transmute(src:simd_select_bitmask(m:k, yes:sub, no:src.as_f64x4()))
1437 }
1438}
1439
1440/// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1441///
1442/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sub_pd&expand=5717)
1443#[inline]
1444#[target_feature(enable = "avx512f,avx512vl")]
1445#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1446#[cfg_attr(test, assert_instr(vsubpd))]
1447#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1448pub const fn _mm256_maskz_sub_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
1449 unsafe {
1450 let sub: Simd = _mm256_sub_pd(a, b).as_f64x4();
1451 transmute(src:simd_select_bitmask(m:k, yes:sub, no:f64x4::ZERO))
1452 }
1453}
1454
1455/// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1456///
1457/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sub_pd&expand=5713)
1458#[inline]
1459#[target_feature(enable = "avx512f,avx512vl")]
1460#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1461#[cfg_attr(test, assert_instr(vsubpd))]
1462#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1463pub const fn _mm_mask_sub_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
1464 unsafe {
1465 let sub: Simd = _mm_sub_pd(a, b).as_f64x2();
1466 transmute(src:simd_select_bitmask(m:k, yes:sub, no:src.as_f64x2()))
1467 }
1468}
1469
1470/// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1471///
1472/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sub_pd&expand=5714)
1473#[inline]
1474#[target_feature(enable = "avx512f,avx512vl")]
1475#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1476#[cfg_attr(test, assert_instr(vsubpd))]
1477#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1478pub const fn _mm_maskz_sub_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
1479 unsafe {
1480 let sub: Simd = _mm_sub_pd(a, b).as_f64x2();
1481 transmute(src:simd_select_bitmask(m:k, yes:sub, no:f64x2::ZERO))
1482 }
1483}
1484
1485/// Multiply the low signed 32-bit integers from each packed 64-bit element in a and b, and store the signed 64-bit results in dst.
1486///
1487/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mul_epi32&expand=3907)
1488#[inline]
1489#[target_feature(enable = "avx512f")]
1490#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1491#[cfg_attr(test, assert_instr(vpmuldq))]
1492#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1493pub const fn _mm512_mul_epi32(a: __m512i, b: __m512i) -> __m512i {
1494 unsafe {
1495 let a: Simd = simd_cast::<_, i64x8>(simd_cast::<_, i32x8>(a.as_i64x8()));
1496 let b: Simd = simd_cast::<_, i64x8>(simd_cast::<_, i32x8>(b.as_i64x8()));
1497 transmute(src:simd_mul(x:a, y:b))
1498 }
1499}
1500
1501/// Multiply the low signed 32-bit integers from each packed 64-bit element in a and b, and store the signed 64-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1502///
1503/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mul_epi32&expand=3905)
1504#[inline]
1505#[target_feature(enable = "avx512f")]
1506#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1507#[cfg_attr(test, assert_instr(vpmuldq))]
1508#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1509pub const fn _mm512_mask_mul_epi32(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
1510 unsafe {
1511 let mul: Simd = _mm512_mul_epi32(a, b).as_i64x8();
1512 transmute(src:simd_select_bitmask(m:k, yes:mul, no:src.as_i64x8()))
1513 }
1514}
1515
1516/// Multiply the low signed 32-bit integers from each packed 64-bit element in a and b, and store the signed 64-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1517///
1518/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mul_epi32&expand=3906)
1519#[inline]
1520#[target_feature(enable = "avx512f")]
1521#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1522#[cfg_attr(test, assert_instr(vpmuldq))]
1523#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1524pub const fn _mm512_maskz_mul_epi32(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
1525 unsafe {
1526 let mul: Simd = _mm512_mul_epi32(a, b).as_i64x8();
1527 transmute(src:simd_select_bitmask(m:k, yes:mul, no:i64x8::ZERO))
1528 }
1529}
1530
1531/// Multiply the low signed 32-bit integers from each packed 64-bit element in a and b, and store the signed 64-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1532///
1533/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mul_epi32&expand=3902)
1534#[inline]
1535#[target_feature(enable = "avx512f,avx512vl")]
1536#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1537#[cfg_attr(test, assert_instr(vpmuldq))]
1538#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1539pub const fn _mm256_mask_mul_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
1540 unsafe {
1541 let mul: Simd = _mm256_mul_epi32(a, b).as_i64x4();
1542 transmute(src:simd_select_bitmask(m:k, yes:mul, no:src.as_i64x4()))
1543 }
1544}
1545
1546/// Multiply the low signed 32-bit integers from each packed 64-bit element in a and b, and store the signed 64-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1547///
1548/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mul_epi32&expand=3903)
1549#[inline]
1550#[target_feature(enable = "avx512f,avx512vl")]
1551#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1552#[cfg_attr(test, assert_instr(vpmuldq))]
1553#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1554pub const fn _mm256_maskz_mul_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
1555 unsafe {
1556 let mul: Simd = _mm256_mul_epi32(a, b).as_i64x4();
1557 transmute(src:simd_select_bitmask(m:k, yes:mul, no:i64x4::ZERO))
1558 }
1559}
1560
1561/// Multiply the low signed 32-bit integers from each packed 64-bit element in a and b, and store the signed 64-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1562///
1563/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mul_epi32&expand=3899)
1564#[inline]
1565#[target_feature(enable = "avx512f,avx512vl")]
1566#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1567#[cfg_attr(test, assert_instr(vpmuldq))]
1568#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1569pub const fn _mm_mask_mul_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1570 unsafe {
1571 let mul: Simd = _mm_mul_epi32(a, b).as_i64x2();
1572 transmute(src:simd_select_bitmask(m:k, yes:mul, no:src.as_i64x2()))
1573 }
1574}
1575
1576/// Multiply the low signed 32-bit integers from each packed 64-bit element in a and b, and store the signed 64-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1577///
1578/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mul_epi32&expand=3900)
1579#[inline]
1580#[target_feature(enable = "avx512f,avx512vl")]
1581#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1582#[cfg_attr(test, assert_instr(vpmuldq))]
1583#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1584pub const fn _mm_maskz_mul_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1585 unsafe {
1586 let mul: Simd = _mm_mul_epi32(a, b).as_i64x2();
1587 transmute(src:simd_select_bitmask(m:k, yes:mul, no:i64x2::ZERO))
1588 }
1589}
1590
1591/// Multiply the packed 32-bit integers in a and b, producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in dst.
1592///
1593/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mullo_epi32&expand=4005)
1594#[inline]
1595#[target_feature(enable = "avx512f")]
1596#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1597#[cfg_attr(test, assert_instr(vpmulld))]
1598#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1599pub const fn _mm512_mullo_epi32(a: __m512i, b: __m512i) -> __m512i {
1600 unsafe { transmute(src:simd_mul(x:a.as_i32x16(), y:b.as_i32x16())) }
1601}
1602
1603/// Multiply the packed 32-bit integers in a and b, producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1604///
1605/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mullo_epi32&expand=4003)
1606#[inline]
1607#[target_feature(enable = "avx512f")]
1608#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1609#[cfg_attr(test, assert_instr(vpmulld))]
1610#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1611pub const fn _mm512_mask_mullo_epi32(
1612 src: __m512i,
1613 k: __mmask16,
1614 a: __m512i,
1615 b: __m512i,
1616) -> __m512i {
1617 unsafe {
1618 let mul: Simd = _mm512_mullo_epi32(a, b).as_i32x16();
1619 transmute(src:simd_select_bitmask(m:k, yes:mul, no:src.as_i32x16()))
1620 }
1621}
1622
1623/// Multiply the packed 32-bit integers in a and b, producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1624///
1625/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mullo_epi32&expand=4004)
1626#[inline]
1627#[target_feature(enable = "avx512f")]
1628#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1629#[cfg_attr(test, assert_instr(vpmulld))]
1630#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1631pub const fn _mm512_maskz_mullo_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
1632 unsafe {
1633 let mul: Simd = _mm512_mullo_epi32(a, b).as_i32x16();
1634 transmute(src:simd_select_bitmask(m:k, yes:mul, no:i32x16::ZERO))
1635 }
1636}
1637
1638/// Multiply the packed 32-bit integers in a and b, producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1639///
1640/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mullo_epi32&expand=4000)
1641#[inline]
1642#[target_feature(enable = "avx512f,avx512vl")]
1643#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1644#[cfg_attr(test, assert_instr(vpmulld))]
1645#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1646pub const fn _mm256_mask_mullo_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
1647 unsafe {
1648 let mul: Simd = _mm256_mullo_epi32(a, b).as_i32x8();
1649 transmute(src:simd_select_bitmask(m:k, yes:mul, no:src.as_i32x8()))
1650 }
1651}
1652
1653/// Multiply the packed 32-bit integers in a and b, producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1654///
1655/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mullo_epi32&expand=4001)
1656#[inline]
1657#[target_feature(enable = "avx512f,avx512vl")]
1658#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1659#[cfg_attr(test, assert_instr(vpmulld))]
1660#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1661pub const fn _mm256_maskz_mullo_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
1662 unsafe {
1663 let mul: Simd = _mm256_mullo_epi32(a, b).as_i32x8();
1664 transmute(src:simd_select_bitmask(m:k, yes:mul, no:i32x8::ZERO))
1665 }
1666}
1667
1668/// Multiply the packed 32-bit integers in a and b, producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1669///
1670/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mullo_epi32&expand=3997)
1671#[inline]
1672#[target_feature(enable = "avx512f,avx512vl")]
1673#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1674#[cfg_attr(test, assert_instr(vpmulld))]
1675#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1676pub const fn _mm_mask_mullo_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1677 unsafe {
1678 let mul: Simd = _mm_mullo_epi32(a, b).as_i32x4();
1679 transmute(src:simd_select_bitmask(m:k, yes:mul, no:src.as_i32x4()))
1680 }
1681}
1682
1683/// Multiply the packed 32-bit integers in a and b, producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1684///
1685/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mullo_epi32&expand=3998)
1686#[inline]
1687#[target_feature(enable = "avx512f,avx512vl")]
1688#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1689#[cfg_attr(test, assert_instr(vpmulld))]
1690#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1691pub const fn _mm_maskz_mullo_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1692 unsafe {
1693 let mul: Simd = _mm_mullo_epi32(a, b).as_i32x4();
1694 transmute(src:simd_select_bitmask(m:k, yes:mul, no:i32x4::ZERO))
1695 }
1696}
1697
1698/// Multiplies elements in packed 64-bit integer vectors a and b together, storing the lower 64 bits of the result in dst.
1699///
1700/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mullox_epi64&expand=4017)
1701///
1702/// This intrinsic generates a sequence of instructions, which may perform worse than a native instruction. Consider the performance impact of this intrinsic.
1703#[inline]
1704#[target_feature(enable = "avx512f")]
1705#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1706#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1707pub const fn _mm512_mullox_epi64(a: __m512i, b: __m512i) -> __m512i {
1708 unsafe { transmute(src:simd_mul(x:a.as_i64x8(), y:b.as_i64x8())) }
1709}
1710
1711/// Multiplies elements in packed 64-bit integer vectors a and b together, storing the lower 64 bits of the result in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1712///
1713/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mullox_epi64&expand=4016)
1714///
1715/// This intrinsic generates a sequence of instructions, which may perform worse than a native instruction. Consider the performance impact of this intrinsic.
1716#[inline]
1717#[target_feature(enable = "avx512f")]
1718#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1719#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1720pub const fn _mm512_mask_mullox_epi64(
1721 src: __m512i,
1722 k: __mmask8,
1723 a: __m512i,
1724 b: __m512i,
1725) -> __m512i {
1726 unsafe {
1727 let mul: Simd = _mm512_mullox_epi64(a, b).as_i64x8();
1728 transmute(src:simd_select_bitmask(m:k, yes:mul, no:src.as_i64x8()))
1729 }
1730}
1731
1732/// Multiply the low unsigned 32-bit integers from each packed 64-bit element in a and b, and store the unsigned 64-bit results in dst.
1733///
1734/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mul_epu32&expand=3916)
1735#[inline]
1736#[target_feature(enable = "avx512f")]
1737#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1738#[cfg_attr(test, assert_instr(vpmuludq))]
1739#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1740pub const fn _mm512_mul_epu32(a: __m512i, b: __m512i) -> __m512i {
1741 unsafe {
1742 let a: Simd = a.as_u64x8();
1743 let b: Simd = b.as_u64x8();
1744 let mask: Simd = u64x8::splat(u32::MAX as u64);
1745 transmute(src:simd_mul(x:simd_and(a, mask), y:simd_and(x:b, y:mask)))
1746 }
1747}
1748
1749/// Multiply the low unsigned 32-bit integers from each packed 64-bit element in a and b, and store the unsigned 64-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1750///
1751/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mul_epu32&expand=3914)
1752#[inline]
1753#[target_feature(enable = "avx512f")]
1754#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1755#[cfg_attr(test, assert_instr(vpmuludq))]
1756#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1757pub const fn _mm512_mask_mul_epu32(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
1758 unsafe {
1759 let mul: Simd = _mm512_mul_epu32(a, b).as_u64x8();
1760 transmute(src:simd_select_bitmask(m:k, yes:mul, no:src.as_u64x8()))
1761 }
1762}
1763
1764/// Multiply the low unsigned 32-bit integers from each packed 64-bit element in a and b, and store the unsigned 64-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1765///
1766/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mul_epu32&expand=3915)
1767#[inline]
1768#[target_feature(enable = "avx512f")]
1769#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1770#[cfg_attr(test, assert_instr(vpmuludq))]
1771#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1772pub const fn _mm512_maskz_mul_epu32(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
1773 unsafe {
1774 let mul: Simd = _mm512_mul_epu32(a, b).as_u64x8();
1775 transmute(src:simd_select_bitmask(m:k, yes:mul, no:u64x8::ZERO))
1776 }
1777}
1778
1779/// Multiply the low unsigned 32-bit integers from each packed 64-bit element in a and b, and store the unsigned 64-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1780///
1781/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mul_epu32&expand=3911)
1782#[inline]
1783#[target_feature(enable = "avx512f,avx512vl")]
1784#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1785#[cfg_attr(test, assert_instr(vpmuludq))]
1786#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1787pub const fn _mm256_mask_mul_epu32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
1788 unsafe {
1789 let mul: Simd = _mm256_mul_epu32(a, b).as_u64x4();
1790 transmute(src:simd_select_bitmask(m:k, yes:mul, no:src.as_u64x4()))
1791 }
1792}
1793
1794/// Multiply the low unsigned 32-bit integers from each packed 64-bit element in a and b, and store the unsigned 64-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1795///
1796/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mul_epu32&expand=3912)
1797#[inline]
1798#[target_feature(enable = "avx512f,avx512vl")]
1799#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1800#[cfg_attr(test, assert_instr(vpmuludq))]
1801#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1802pub const fn _mm256_maskz_mul_epu32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
1803 unsafe {
1804 let mul: Simd = _mm256_mul_epu32(a, b).as_u64x4();
1805 transmute(src:simd_select_bitmask(m:k, yes:mul, no:u64x4::ZERO))
1806 }
1807}
1808
1809/// Multiply the low unsigned 32-bit integers from each packed 64-bit element in a and b, and store the unsigned 64-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1810///
1811/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mul_epu32&expand=3908)
1812#[inline]
1813#[target_feature(enable = "avx512f,avx512vl")]
1814#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1815#[cfg_attr(test, assert_instr(vpmuludq))]
1816#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1817pub const fn _mm_mask_mul_epu32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1818 unsafe {
1819 let mul: Simd = _mm_mul_epu32(a, b).as_u64x2();
1820 transmute(src:simd_select_bitmask(m:k, yes:mul, no:src.as_u64x2()))
1821 }
1822}
1823
1824/// Multiply the low unsigned 32-bit integers from each packed 64-bit element in a and b, and store the unsigned 64-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1825///
1826/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mul_epu32&expand=3909)
1827#[inline]
1828#[target_feature(enable = "avx512f,avx512vl")]
1829#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1830#[cfg_attr(test, assert_instr(vpmuludq))]
1831#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1832pub const fn _mm_maskz_mul_epu32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1833 unsafe {
1834 let mul: Simd = _mm_mul_epu32(a, b).as_u64x2();
1835 transmute(src:simd_select_bitmask(m:k, yes:mul, no:u64x2::ZERO))
1836 }
1837}
1838
1839/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst.
1840///
1841/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mul_ps&expand=3934)
1842#[inline]
1843#[target_feature(enable = "avx512f")]
1844#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1845#[cfg_attr(test, assert_instr(vmulps))]
1846#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1847pub const fn _mm512_mul_ps(a: __m512, b: __m512) -> __m512 {
1848 unsafe { transmute(src:simd_mul(x:a.as_f32x16(), y:b.as_f32x16())) }
1849}
1850
1851/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1852///
1853/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mul_ps&expand=3932)
1854#[inline]
1855#[target_feature(enable = "avx512f")]
1856#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1857#[cfg_attr(test, assert_instr(vmulps))]
1858#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1859pub const fn _mm512_mask_mul_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
1860 unsafe {
1861 let mul: Simd = _mm512_mul_ps(a, b).as_f32x16();
1862 transmute(src:simd_select_bitmask(m:k, yes:mul, no:src.as_f32x16()))
1863 }
1864}
1865
1866/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1867///
1868/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mul_ps&expand=3933)
1869#[inline]
1870#[target_feature(enable = "avx512f")]
1871#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1872#[cfg_attr(test, assert_instr(vmulps))]
1873#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1874pub const fn _mm512_maskz_mul_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
1875 unsafe {
1876 let mul: Simd = _mm512_mul_ps(a, b).as_f32x16();
1877 transmute(src:simd_select_bitmask(m:k, yes:mul, no:f32x16::ZERO))
1878 }
1879}
1880
1881/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1882///
1883/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mul_ps&expand=3929)
1884#[inline]
1885#[target_feature(enable = "avx512f,avx512vl")]
1886#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1887#[cfg_attr(test, assert_instr(vmulps))]
1888#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1889pub const fn _mm256_mask_mul_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
1890 unsafe {
1891 let mul: Simd = _mm256_mul_ps(a, b).as_f32x8();
1892 transmute(src:simd_select_bitmask(m:k, yes:mul, no:src.as_f32x8()))
1893 }
1894}
1895
1896/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1897///
1898/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mul_ps&expand=3930)
1899#[inline]
1900#[target_feature(enable = "avx512f,avx512vl")]
1901#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1902#[cfg_attr(test, assert_instr(vmulps))]
1903#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1904pub const fn _mm256_maskz_mul_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
1905 unsafe {
1906 let mul: Simd = _mm256_mul_ps(a, b).as_f32x8();
1907 transmute(src:simd_select_bitmask(m:k, yes:mul, no:f32x8::ZERO))
1908 }
1909}
1910
1911/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1912///
1913/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mul_ps&expand=3926)
1914#[inline]
1915#[target_feature(enable = "avx512f,avx512vl")]
1916#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1917#[cfg_attr(test, assert_instr(vmulps))]
1918#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1919pub const fn _mm_mask_mul_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
1920 unsafe {
1921 let mul: Simd = _mm_mul_ps(a, b).as_f32x4();
1922 transmute(src:simd_select_bitmask(m:k, yes:mul, no:src.as_f32x4()))
1923 }
1924}
1925
1926/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1927///
1928/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mul_ps&expand=3927)
1929#[inline]
1930#[target_feature(enable = "avx512f,avx512vl")]
1931#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1932#[cfg_attr(test, assert_instr(vmulps))]
1933#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1934pub const fn _mm_maskz_mul_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
1935 unsafe {
1936 let mul: Simd = _mm_mul_ps(a, b).as_f32x4();
1937 transmute(src:simd_select_bitmask(m:k, yes:mul, no:f32x4::ZERO))
1938 }
1939}
1940
1941/// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst.
1942///
1943/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mul_pd&expand=3925)
1944#[inline]
1945#[target_feature(enable = "avx512f")]
1946#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1947#[cfg_attr(test, assert_instr(vmulpd))]
1948#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1949pub const fn _mm512_mul_pd(a: __m512d, b: __m512d) -> __m512d {
1950 unsafe { transmute(src:simd_mul(x:a.as_f64x8(), y:b.as_f64x8())) }
1951}
1952
1953/// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1954///
1955/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mul_pd&expand=3923)
1956#[inline]
1957#[target_feature(enable = "avx512f")]
1958#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1959#[cfg_attr(test, assert_instr(vmulpd))]
1960#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1961pub const fn _mm512_mask_mul_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
1962 unsafe {
1963 let mul: Simd = _mm512_mul_pd(a, b).as_f64x8();
1964 transmute(src:simd_select_bitmask(m:k, yes:mul, no:src.as_f64x8()))
1965 }
1966}
1967
1968/// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1969///
1970/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mul_pd&expand=3924)
1971#[inline]
1972#[target_feature(enable = "avx512f")]
1973#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1974#[cfg_attr(test, assert_instr(vmulpd))]
1975#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1976pub const fn _mm512_maskz_mul_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
1977 unsafe {
1978 let mul: Simd = _mm512_mul_pd(a, b).as_f64x8();
1979 transmute(src:simd_select_bitmask(m:k, yes:mul, no:f64x8::ZERO))
1980 }
1981}
1982
1983/// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1984///
1985/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mul_pd&expand=3920)
1986#[inline]
1987#[target_feature(enable = "avx512f,avx512vl")]
1988#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1989#[cfg_attr(test, assert_instr(vmulpd))]
1990#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1991pub const fn _mm256_mask_mul_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
1992 unsafe {
1993 let mul: Simd = _mm256_mul_pd(a, b).as_f64x4();
1994 transmute(src:simd_select_bitmask(m:k, yes:mul, no:src.as_f64x4()))
1995 }
1996}
1997
1998/// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1999///
2000/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mul_pd&expand=3921)
2001#[inline]
2002#[target_feature(enable = "avx512f,avx512vl")]
2003#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2004#[cfg_attr(test, assert_instr(vmulpd))]
2005#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2006pub const fn _mm256_maskz_mul_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
2007 unsafe {
2008 let mul: Simd = _mm256_mul_pd(a, b).as_f64x4();
2009 transmute(src:simd_select_bitmask(m:k, yes:mul, no:f64x4::ZERO))
2010 }
2011}
2012
2013/// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2014///
2015/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mul_pd&expand=3917)
2016#[inline]
2017#[target_feature(enable = "avx512f,avx512vl")]
2018#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2019#[cfg_attr(test, assert_instr(vmulpd))]
2020#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2021pub const fn _mm_mask_mul_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
2022 unsafe {
2023 let mul: Simd = _mm_mul_pd(a, b).as_f64x2();
2024 transmute(src:simd_select_bitmask(m:k, yes:mul, no:src.as_f64x2()))
2025 }
2026}
2027
2028/// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2029///
2030/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mul_pd&expand=3918)
2031#[inline]
2032#[target_feature(enable = "avx512f,avx512vl")]
2033#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2034#[cfg_attr(test, assert_instr(vmulpd))]
2035#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2036pub const fn _mm_maskz_mul_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
2037 unsafe {
2038 let mul: Simd = _mm_mul_pd(a, b).as_f64x2();
2039 transmute(src:simd_select_bitmask(m:k, yes:mul, no:f64x2::ZERO))
2040 }
2041}
2042
2043/// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst.
2044///
2045/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_div_ps&expand=2162)
2046#[inline]
2047#[target_feature(enable = "avx512f")]
2048#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2049#[cfg_attr(test, assert_instr(vdivps))]
2050#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2051pub const fn _mm512_div_ps(a: __m512, b: __m512) -> __m512 {
2052 unsafe { transmute(src:simd_div(lhs:a.as_f32x16(), rhs:b.as_f32x16())) }
2053}
2054
2055/// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2056///
2057/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_div_ps&expand=2163)
2058#[inline]
2059#[target_feature(enable = "avx512f")]
2060#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2061#[cfg_attr(test, assert_instr(vdivps))]
2062#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2063pub const fn _mm512_mask_div_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
2064 unsafe {
2065 let div: Simd = _mm512_div_ps(a, b).as_f32x16();
2066 transmute(src:simd_select_bitmask(m:k, yes:div, no:src.as_f32x16()))
2067 }
2068}
2069
2070/// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2071///
2072/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_div_ps&expand=2164)
2073#[inline]
2074#[target_feature(enable = "avx512f")]
2075#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2076#[cfg_attr(test, assert_instr(vdivps))]
2077#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2078pub const fn _mm512_maskz_div_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
2079 unsafe {
2080 let div: Simd = _mm512_div_ps(a, b).as_f32x16();
2081 transmute(src:simd_select_bitmask(m:k, yes:div, no:f32x16::ZERO))
2082 }
2083}
2084
2085/// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2086///
2087/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_div_ps&expand=2160)
2088#[inline]
2089#[target_feature(enable = "avx512f,avx512vl")]
2090#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2091#[cfg_attr(test, assert_instr(vdivps))]
2092#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2093pub const fn _mm256_mask_div_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
2094 unsafe {
2095 let div: Simd = _mm256_div_ps(a, b).as_f32x8();
2096 transmute(src:simd_select_bitmask(m:k, yes:div, no:src.as_f32x8()))
2097 }
2098}
2099
2100/// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2101///
2102/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_div_ps&expand=2161)
2103#[inline]
2104#[target_feature(enable = "avx512f,avx512vl")]
2105#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2106#[cfg_attr(test, assert_instr(vdivps))]
2107#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2108pub const fn _mm256_maskz_div_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
2109 unsafe {
2110 let div: Simd = _mm256_div_ps(a, b).as_f32x8();
2111 transmute(src:simd_select_bitmask(m:k, yes:div, no:f32x8::ZERO))
2112 }
2113}
2114
2115/// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2116///
2117/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_div_ps&expand=2157)
2118#[inline]
2119#[target_feature(enable = "avx512f,avx512vl")]
2120#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2121#[cfg_attr(test, assert_instr(vdivps))]
2122#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2123pub const fn _mm_mask_div_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
2124 unsafe {
2125 let div: Simd = _mm_div_ps(a, b).as_f32x4();
2126 transmute(src:simd_select_bitmask(m:k, yes:div, no:src.as_f32x4()))
2127 }
2128}
2129
2130/// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2131///
2132/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_div_ps&expand=2158)
2133#[inline]
2134#[target_feature(enable = "avx512f,avx512vl")]
2135#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2136#[cfg_attr(test, assert_instr(vdivps))]
2137#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2138pub const fn _mm_maskz_div_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
2139 unsafe {
2140 let div: Simd = _mm_div_ps(a, b).as_f32x4();
2141 transmute(src:simd_select_bitmask(m:k, yes:div, no:f32x4::ZERO))
2142 }
2143}
2144
2145/// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, and store the results in dst.
2146///
2147/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_div_pd&expand=2153)
2148#[inline]
2149#[target_feature(enable = "avx512f")]
2150#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2151#[cfg_attr(test, assert_instr(vdivpd))]
2152#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2153pub const fn _mm512_div_pd(a: __m512d, b: __m512d) -> __m512d {
2154 unsafe { transmute(src:simd_div(lhs:a.as_f64x8(), rhs:b.as_f64x8())) }
2155}
2156
2157/// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2158///
2159/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_div_pd&expand=2154)
2160#[inline]
2161#[target_feature(enable = "avx512f")]
2162#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2163#[cfg_attr(test, assert_instr(vdivpd))]
2164#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2165pub const fn _mm512_mask_div_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
2166 unsafe {
2167 let div: Simd = _mm512_div_pd(a, b).as_f64x8();
2168 transmute(src:simd_select_bitmask(m:k, yes:div, no:src.as_f64x8()))
2169 }
2170}
2171
2172/// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2173///
2174/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_div_pd&expand=2155)
2175#[inline]
2176#[target_feature(enable = "avx512f")]
2177#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2178#[cfg_attr(test, assert_instr(vdivpd))]
2179#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2180pub const fn _mm512_maskz_div_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
2181 unsafe {
2182 let div: Simd = _mm512_div_pd(a, b).as_f64x8();
2183 transmute(src:simd_select_bitmask(m:k, yes:div, no:f64x8::ZERO))
2184 }
2185}
2186
2187/// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2188///
2189/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_div_pd&expand=2151)
2190#[inline]
2191#[target_feature(enable = "avx512f,avx512vl")]
2192#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2193#[cfg_attr(test, assert_instr(vdivpd))]
2194#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2195pub const fn _mm256_mask_div_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
2196 unsafe {
2197 let div: Simd = _mm256_div_pd(a, b).as_f64x4();
2198 transmute(src:simd_select_bitmask(m:k, yes:div, no:src.as_f64x4()))
2199 }
2200}
2201
2202/// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2203///
2204/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_div_pd&expand=2152)
2205#[inline]
2206#[target_feature(enable = "avx512f,avx512vl")]
2207#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2208#[cfg_attr(test, assert_instr(vdivpd))]
2209#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2210pub const fn _mm256_maskz_div_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
2211 unsafe {
2212 let div: Simd = _mm256_div_pd(a, b).as_f64x4();
2213 transmute(src:simd_select_bitmask(m:k, yes:div, no:f64x4::ZERO))
2214 }
2215}
2216
2217/// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2218///
2219/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_div_pd&expand=2148)
2220#[inline]
2221#[target_feature(enable = "avx512f,avx512vl")]
2222#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2223#[cfg_attr(test, assert_instr(vdivpd))]
2224#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2225pub const fn _mm_mask_div_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
2226 unsafe {
2227 let div: Simd = _mm_div_pd(a, b).as_f64x2();
2228 transmute(src:simd_select_bitmask(m:k, yes:div, no:src.as_f64x2()))
2229 }
2230}
2231
2232/// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2233///
2234/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_div_pd&expand=2149)
2235#[inline]
2236#[target_feature(enable = "avx512f,avx512vl")]
2237#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2238#[cfg_attr(test, assert_instr(vdivpd))]
2239#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2240pub const fn _mm_maskz_div_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
2241 unsafe {
2242 let div: Simd = _mm_div_pd(a, b).as_f64x2();
2243 transmute(src:simd_select_bitmask(m:k, yes:div, no:f64x2::ZERO))
2244 }
2245}
2246
2247/// Compare packed signed 32-bit integers in a and b, and store packed maximum values in dst.
2248///
2249/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_max_epi32&expand=3582)
2250#[inline]
2251#[target_feature(enable = "avx512f")]
2252#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2253#[cfg_attr(test, assert_instr(vpmaxsd))]
2254#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2255pub const fn _mm512_max_epi32(a: __m512i, b: __m512i) -> __m512i {
2256 unsafe { simd_imax(a.as_i32x16(), b.as_i32x16()).as_m512i() }
2257}
2258
2259/// Compare packed signed 32-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2260///
2261/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_max_epi32&expand=3580)
2262#[inline]
2263#[target_feature(enable = "avx512f")]
2264#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2265#[cfg_attr(test, assert_instr(vpmaxsd))]
2266#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2267pub const fn _mm512_mask_max_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
2268 unsafe {
2269 let max: Simd = _mm512_max_epi32(a, b).as_i32x16();
2270 transmute(src:simd_select_bitmask(m:k, yes:max, no:src.as_i32x16()))
2271 }
2272}
2273
2274/// Compare packed signed 32-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2275///
2276/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_max_epi32&expand=3581)
2277#[inline]
2278#[target_feature(enable = "avx512f")]
2279#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2280#[cfg_attr(test, assert_instr(vpmaxsd))]
2281#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2282pub const fn _mm512_maskz_max_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
2283 unsafe {
2284 let max: Simd = _mm512_max_epi32(a, b).as_i32x16();
2285 transmute(src:simd_select_bitmask(m:k, yes:max, no:i32x16::ZERO))
2286 }
2287}
2288
2289/// Compare packed signed 32-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2290///
2291/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_max_epi32&expand=3577)
2292#[inline]
2293#[target_feature(enable = "avx512f,avx512vl")]
2294#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2295#[cfg_attr(test, assert_instr(vpmaxsd))]
2296#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2297pub const fn _mm256_mask_max_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
2298 unsafe {
2299 let max: Simd = _mm256_max_epi32(a, b).as_i32x8();
2300 transmute(src:simd_select_bitmask(m:k, yes:max, no:src.as_i32x8()))
2301 }
2302}
2303
2304/// Compare packed signed 32-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2305///
2306/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_max_epi32&expand=3578)
2307#[inline]
2308#[target_feature(enable = "avx512f,avx512vl")]
2309#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2310#[cfg_attr(test, assert_instr(vpmaxsd))]
2311#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2312pub const fn _mm256_maskz_max_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
2313 unsafe {
2314 let max: Simd = _mm256_max_epi32(a, b).as_i32x8();
2315 transmute(src:simd_select_bitmask(m:k, yes:max, no:i32x8::ZERO))
2316 }
2317}
2318
2319/// Compare packed signed 32-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2320///
2321/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_max_epi32&expand=3574)
2322#[inline]
2323#[target_feature(enable = "avx512f,avx512vl")]
2324#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2325#[cfg_attr(test, assert_instr(vpmaxsd))]
2326#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2327pub const fn _mm_mask_max_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2328 unsafe {
2329 let max: Simd = _mm_max_epi32(a, b).as_i32x4();
2330 transmute(src:simd_select_bitmask(m:k, yes:max, no:src.as_i32x4()))
2331 }
2332}
2333
2334/// Compare packed signed 32-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2335///
2336/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_max_epi32&expand=3575)
2337#[inline]
2338#[target_feature(enable = "avx512f,avx512vl")]
2339#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2340#[cfg_attr(test, assert_instr(vpmaxsd))]
2341#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2342pub const fn _mm_maskz_max_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2343 unsafe {
2344 let max: Simd = _mm_max_epi32(a, b).as_i32x4();
2345 transmute(src:simd_select_bitmask(m:k, yes:max, no:i32x4::ZERO))
2346 }
2347}
2348
2349/// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst.
2350///
2351/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_max_epi64&expand=3591)
2352#[inline]
2353#[target_feature(enable = "avx512f")]
2354#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2355#[cfg_attr(test, assert_instr(vpmaxsq))]
2356#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2357pub const fn _mm512_max_epi64(a: __m512i, b: __m512i) -> __m512i {
2358 unsafe { simd_imax(a.as_i64x8(), b.as_i64x8()).as_m512i() }
2359}
2360
2361/// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2362///
2363/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_max_epi64&expand=3589)
2364#[inline]
2365#[target_feature(enable = "avx512f")]
2366#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2367#[cfg_attr(test, assert_instr(vpmaxsq))]
2368#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2369pub const fn _mm512_mask_max_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
2370 unsafe {
2371 let max: Simd = _mm512_max_epi64(a, b).as_i64x8();
2372 transmute(src:simd_select_bitmask(m:k, yes:max, no:src.as_i64x8()))
2373 }
2374}
2375
2376/// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2377///
2378/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_max_epi64&expand=3590)
2379#[inline]
2380#[target_feature(enable = "avx512f")]
2381#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2382#[cfg_attr(test, assert_instr(vpmaxsq))]
2383#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2384pub const fn _mm512_maskz_max_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
2385 unsafe {
2386 let max: Simd = _mm512_max_epi64(a, b).as_i64x8();
2387 transmute(src:simd_select_bitmask(m:k, yes:max, no:i64x8::ZERO))
2388 }
2389}
2390
2391/// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst.
2392///
2393/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_max_epi64&expand=3588)
2394#[inline]
2395#[target_feature(enable = "avx512f,avx512vl")]
2396#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2397#[cfg_attr(test, assert_instr(vpmaxsq))]
2398#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2399pub const fn _mm256_max_epi64(a: __m256i, b: __m256i) -> __m256i {
2400 unsafe { simd_imax(a.as_i64x4(), b.as_i64x4()).as_m256i() }
2401}
2402
2403/// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2404///
2405/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_max_epi64&expand=3586)
2406#[inline]
2407#[target_feature(enable = "avx512f,avx512vl")]
2408#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2409#[cfg_attr(test, assert_instr(vpmaxsq))]
2410#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2411pub const fn _mm256_mask_max_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
2412 unsafe {
2413 let max: Simd = _mm256_max_epi64(a, b).as_i64x4();
2414 transmute(src:simd_select_bitmask(m:k, yes:max, no:src.as_i64x4()))
2415 }
2416}
2417
2418/// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2419///
2420/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_max_epi64&expand=3587)
2421#[inline]
2422#[target_feature(enable = "avx512f,avx512vl")]
2423#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2424#[cfg_attr(test, assert_instr(vpmaxsq))]
2425#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2426pub const fn _mm256_maskz_max_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
2427 unsafe {
2428 let max: Simd = _mm256_max_epi64(a, b).as_i64x4();
2429 transmute(src:simd_select_bitmask(m:k, yes:max, no:i64x4::ZERO))
2430 }
2431}
2432
2433/// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst.
2434///
2435/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_max_epi64&expand=3585)
2436#[inline]
2437#[target_feature(enable = "avx512f,avx512vl")]
2438#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2439#[cfg_attr(test, assert_instr(vpmaxsq))]
2440#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2441pub const fn _mm_max_epi64(a: __m128i, b: __m128i) -> __m128i {
2442 unsafe { simd_imax(a.as_i64x2(), b.as_i64x2()).as_m128i() }
2443}
2444
2445/// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2446///
2447/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_max_epi64&expand=3583)
2448#[inline]
2449#[target_feature(enable = "avx512f,avx512vl")]
2450#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2451#[cfg_attr(test, assert_instr(vpmaxsq))]
2452#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2453pub const fn _mm_mask_max_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2454 unsafe {
2455 let max: Simd = _mm_max_epi64(a, b).as_i64x2();
2456 transmute(src:simd_select_bitmask(m:k, yes:max, no:src.as_i64x2()))
2457 }
2458}
2459
2460/// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2461///
2462/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_max_epi64&expand=3584)
2463#[inline]
2464#[target_feature(enable = "avx512f,avx512vl")]
2465#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2466#[cfg_attr(test, assert_instr(vpmaxsq))]
2467#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2468pub const fn _mm_maskz_max_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2469 unsafe {
2470 let max: Simd = _mm_max_epi64(a, b).as_i64x2();
2471 transmute(src:simd_select_bitmask(m:k, yes:max, no:i64x2::ZERO))
2472 }
2473}
2474
2475/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst.
2476///
2477/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_max_ps&expand=3655)
2478#[inline]
2479#[target_feature(enable = "avx512f")]
2480#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2481#[cfg_attr(test, assert_instr(vmaxps))]
2482pub fn _mm512_max_ps(a: __m512, b: __m512) -> __m512 {
2483 unsafe {
2484 transmute(src:vmaxps(
2485 a.as_f32x16(),
2486 b.as_f32x16(),
2487 _MM_FROUND_CUR_DIRECTION,
2488 ))
2489 }
2490}
2491
2492/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2493///
2494/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_max_ps&expand=3653)
2495#[inline]
2496#[target_feature(enable = "avx512f")]
2497#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2498#[cfg_attr(test, assert_instr(vmaxps))]
2499pub fn _mm512_mask_max_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
2500 unsafe {
2501 let max: Simd = _mm512_max_ps(a, b).as_f32x16();
2502 transmute(src:simd_select_bitmask(m:k, yes:max, no:src.as_f32x16()))
2503 }
2504}
2505
2506/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2507///
2508/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_max_ps&expand=3654)
2509#[inline]
2510#[target_feature(enable = "avx512f")]
2511#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2512#[cfg_attr(test, assert_instr(vmaxps))]
2513pub fn _mm512_maskz_max_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
2514 unsafe {
2515 let max: Simd = _mm512_max_ps(a, b).as_f32x16();
2516 transmute(src:simd_select_bitmask(m:k, yes:max, no:f32x16::ZERO))
2517 }
2518}
2519
2520/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2521///
2522/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_max_ps&expand=3650)
2523#[inline]
2524#[target_feature(enable = "avx512f,avx512vl")]
2525#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2526#[cfg_attr(test, assert_instr(vmaxps))]
2527pub fn _mm256_mask_max_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
2528 unsafe {
2529 let max: Simd = _mm256_max_ps(a, b).as_f32x8();
2530 transmute(src:simd_select_bitmask(m:k, yes:max, no:src.as_f32x8()))
2531 }
2532}
2533
2534/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2535///
2536/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_max_ps&expand=3651)
2537#[inline]
2538#[target_feature(enable = "avx512f,avx512vl")]
2539#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2540#[cfg_attr(test, assert_instr(vmaxps))]
2541pub fn _mm256_maskz_max_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
2542 unsafe {
2543 let max: Simd = _mm256_max_ps(a, b).as_f32x8();
2544 transmute(src:simd_select_bitmask(m:k, yes:max, no:f32x8::ZERO))
2545 }
2546}
2547
2548/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2549///
2550/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_max_ps&expand=3647)
2551#[inline]
2552#[target_feature(enable = "avx512f,avx512vl")]
2553#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2554#[cfg_attr(test, assert_instr(vmaxps))]
2555pub fn _mm_mask_max_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
2556 unsafe {
2557 let max: Simd = _mm_max_ps(a, b).as_f32x4();
2558 transmute(src:simd_select_bitmask(m:k, yes:max, no:src.as_f32x4()))
2559 }
2560}
2561
2562/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2563///
2564/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_max_ps&expand=3648)
2565#[inline]
2566#[target_feature(enable = "avx512f,avx512vl")]
2567#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2568#[cfg_attr(test, assert_instr(vmaxps))]
2569pub fn _mm_maskz_max_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
2570 unsafe {
2571 let max: Simd = _mm_max_ps(a, b).as_f32x4();
2572 transmute(src:simd_select_bitmask(m:k, yes:max, no:f32x4::ZERO))
2573 }
2574}
2575
2576/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst.
2577///
2578/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_max_pd&expand=3645)
2579#[inline]
2580#[target_feature(enable = "avx512f")]
2581#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2582#[cfg_attr(test, assert_instr(vmaxpd))]
2583pub fn _mm512_max_pd(a: __m512d, b: __m512d) -> __m512d {
2584 unsafe { transmute(src:vmaxpd(a.as_f64x8(), b.as_f64x8(), _MM_FROUND_CUR_DIRECTION)) }
2585}
2586
2587/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2588///
2589/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_max_pd&expand=3643)
2590#[inline]
2591#[target_feature(enable = "avx512f")]
2592#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2593#[cfg_attr(test, assert_instr(vmaxpd))]
2594pub fn _mm512_mask_max_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
2595 unsafe {
2596 let max: Simd = _mm512_max_pd(a, b).as_f64x8();
2597 transmute(src:simd_select_bitmask(m:k, yes:max, no:src.as_f64x8()))
2598 }
2599}
2600
2601/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2602///
2603/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_max_pd&expand=3644)
2604#[inline]
2605#[target_feature(enable = "avx512f")]
2606#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2607#[cfg_attr(test, assert_instr(vmaxpd))]
2608pub fn _mm512_maskz_max_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
2609 unsafe {
2610 let max: Simd = _mm512_max_pd(a, b).as_f64x8();
2611 transmute(src:simd_select_bitmask(m:k, yes:max, no:f64x8::ZERO))
2612 }
2613}
2614
2615/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2616///
2617/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_max_pd&expand=3640)
2618#[inline]
2619#[target_feature(enable = "avx512f,avx512vl")]
2620#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2621#[cfg_attr(test, assert_instr(vmaxpd))]
2622pub fn _mm256_mask_max_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
2623 unsafe {
2624 let max: Simd = _mm256_max_pd(a, b).as_f64x4();
2625 transmute(src:simd_select_bitmask(m:k, yes:max, no:src.as_f64x4()))
2626 }
2627}
2628
2629/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2630///
2631/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_max_pd&expand=3641)
2632#[inline]
2633#[target_feature(enable = "avx512f,avx512vl")]
2634#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2635#[cfg_attr(test, assert_instr(vmaxpd))]
2636pub fn _mm256_maskz_max_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
2637 unsafe {
2638 let max: Simd = _mm256_max_pd(a, b).as_f64x4();
2639 transmute(src:simd_select_bitmask(m:k, yes:max, no:f64x4::ZERO))
2640 }
2641}
2642
2643/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2644///
2645/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_max_pd&expand=3637)
2646#[inline]
2647#[target_feature(enable = "avx512f,avx512vl")]
2648#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2649#[cfg_attr(test, assert_instr(vmaxpd))]
2650pub fn _mm_mask_max_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
2651 unsafe {
2652 let max: Simd = _mm_max_pd(a, b).as_f64x2();
2653 transmute(src:simd_select_bitmask(m:k, yes:max, no:src.as_f64x2()))
2654 }
2655}
2656
2657/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2658///
2659/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_max_pd&expand=3638)
2660#[inline]
2661#[target_feature(enable = "avx512f,avx512vl")]
2662#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2663#[cfg_attr(test, assert_instr(vmaxpd))]
2664pub fn _mm_maskz_max_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
2665 unsafe {
2666 let max: Simd = _mm_max_pd(a, b).as_f64x2();
2667 transmute(src:simd_select_bitmask(m:k, yes:max, no:f64x2::ZERO))
2668 }
2669}
2670
2671/// Compare packed unsigned 32-bit integers in a and b, and store packed maximum values in dst.
2672///
2673/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_max_epu32&expand=3618)
2674#[inline]
2675#[target_feature(enable = "avx512f")]
2676#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2677#[cfg_attr(test, assert_instr(vpmaxud))]
2678#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2679pub const fn _mm512_max_epu32(a: __m512i, b: __m512i) -> __m512i {
2680 unsafe { simd_imax(a.as_u32x16(), b.as_u32x16()).as_m512i() }
2681}
2682
2683/// Compare packed unsigned 32-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2684///
2685/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_max_epu32&expand=3616)
2686#[inline]
2687#[target_feature(enable = "avx512f")]
2688#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2689#[cfg_attr(test, assert_instr(vpmaxud))]
2690#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2691pub const fn _mm512_mask_max_epu32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
2692 unsafe {
2693 let max: Simd = _mm512_max_epu32(a, b).as_u32x16();
2694 transmute(src:simd_select_bitmask(m:k, yes:max, no:src.as_u32x16()))
2695 }
2696}
2697
2698/// Compare packed unsigned 32-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2699///
2700/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_max_epu32&expand=3617)
2701#[inline]
2702#[target_feature(enable = "avx512f")]
2703#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2704#[cfg_attr(test, assert_instr(vpmaxud))]
2705#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2706pub const fn _mm512_maskz_max_epu32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
2707 unsafe {
2708 let max: Simd = _mm512_max_epu32(a, b).as_u32x16();
2709 transmute(src:simd_select_bitmask(m:k, yes:max, no:u32x16::ZERO))
2710 }
2711}
2712
2713/// Compare packed unsigned 32-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2714///
2715/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_max_epu32&expand=3613)
2716#[inline]
2717#[target_feature(enable = "avx512f,avx512vl")]
2718#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2719#[cfg_attr(test, assert_instr(vpmaxud))]
2720#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2721pub const fn _mm256_mask_max_epu32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
2722 unsafe {
2723 let max: Simd = _mm256_max_epu32(a, b).as_u32x8();
2724 transmute(src:simd_select_bitmask(m:k, yes:max, no:src.as_u32x8()))
2725 }
2726}
2727
2728/// Compare packed unsigned 32-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2729///
2730/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_max_epu32&expand=3614)
2731#[inline]
2732#[target_feature(enable = "avx512f,avx512vl")]
2733#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2734#[cfg_attr(test, assert_instr(vpmaxud))]
2735#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2736pub const fn _mm256_maskz_max_epu32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
2737 unsafe {
2738 let max: Simd = _mm256_max_epu32(a, b).as_u32x8();
2739 transmute(src:simd_select_bitmask(m:k, yes:max, no:u32x8::ZERO))
2740 }
2741}
2742
2743/// Compare packed unsigned 32-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2744///
2745/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_max_epu32&expand=3610)
2746#[inline]
2747#[target_feature(enable = "avx512f,avx512vl")]
2748#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2749#[cfg_attr(test, assert_instr(vpmaxud))]
2750#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2751pub const fn _mm_mask_max_epu32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2752 unsafe {
2753 let max: Simd = _mm_max_epu32(a, b).as_u32x4();
2754 transmute(src:simd_select_bitmask(m:k, yes:max, no:src.as_u32x4()))
2755 }
2756}
2757
2758/// Compare packed unsigned 32-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2759///
2760/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_max_epu32&expand=3611)
2761#[inline]
2762#[target_feature(enable = "avx512f,avx512vl")]
2763#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2764#[cfg_attr(test, assert_instr(vpmaxud))]
2765#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2766pub const fn _mm_maskz_max_epu32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2767 unsafe {
2768 let max: Simd = _mm_max_epu32(a, b).as_u32x4();
2769 transmute(src:simd_select_bitmask(m:k, yes:max, no:u32x4::ZERO))
2770 }
2771}
2772
2773/// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst.
2774///
2775/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_max_epu64&expand=3627)
2776#[inline]
2777#[target_feature(enable = "avx512f")]
2778#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2779#[cfg_attr(test, assert_instr(vpmaxuq))]
2780#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2781pub const fn _mm512_max_epu64(a: __m512i, b: __m512i) -> __m512i {
2782 unsafe { simd_imax(a.as_u64x8(), b.as_u64x8()).as_m512i() }
2783}
2784
2785/// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2786///
2787/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_max_epu64&expand=3625)
2788#[inline]
2789#[target_feature(enable = "avx512f")]
2790#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2791#[cfg_attr(test, assert_instr(vpmaxuq))]
2792#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2793pub const fn _mm512_mask_max_epu64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
2794 unsafe {
2795 let max: Simd = _mm512_max_epu64(a, b).as_u64x8();
2796 transmute(src:simd_select_bitmask(m:k, yes:max, no:src.as_u64x8()))
2797 }
2798}
2799
2800/// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2801///
2802/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_max_epu64&expand=3626)
2803#[inline]
2804#[target_feature(enable = "avx512f")]
2805#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2806#[cfg_attr(test, assert_instr(vpmaxuq))]
2807#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2808pub const fn _mm512_maskz_max_epu64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
2809 unsafe {
2810 let max: Simd = _mm512_max_epu64(a, b).as_u64x8();
2811 transmute(src:simd_select_bitmask(m:k, yes:max, no:u64x8::ZERO))
2812 }
2813}
2814
2815/// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst.
2816///
2817/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_max_epu64&expand=3624)
2818#[inline]
2819#[target_feature(enable = "avx512f,avx512vl")]
2820#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2821#[cfg_attr(test, assert_instr(vpmaxuq))]
2822#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2823pub const fn _mm256_max_epu64(a: __m256i, b: __m256i) -> __m256i {
2824 unsafe { simd_imax(a.as_u64x4(), b.as_u64x4()).as_m256i() }
2825}
2826
2827/// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2828///
2829/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_max_epu64&expand=3622)
2830#[inline]
2831#[target_feature(enable = "avx512f,avx512vl")]
2832#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2833#[cfg_attr(test, assert_instr(vpmaxuq))]
2834#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2835pub const fn _mm256_mask_max_epu64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
2836 unsafe {
2837 let max: Simd = _mm256_max_epu64(a, b).as_u64x4();
2838 transmute(src:simd_select_bitmask(m:k, yes:max, no:src.as_u64x4()))
2839 }
2840}
2841
2842/// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2843///
2844/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_max_epu64&expand=3623)
2845#[inline]
2846#[target_feature(enable = "avx512f,avx512vl")]
2847#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2848#[cfg_attr(test, assert_instr(vpmaxuq))]
2849#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2850pub const fn _mm256_maskz_max_epu64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
2851 unsafe {
2852 let max: Simd = _mm256_max_epu64(a, b).as_u64x4();
2853 transmute(src:simd_select_bitmask(m:k, yes:max, no:u64x4::ZERO))
2854 }
2855}
2856
2857/// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst.
2858///
2859/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_max_epu64&expand=3621)
2860#[inline]
2861#[target_feature(enable = "avx512f,avx512vl")]
2862#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2863#[cfg_attr(test, assert_instr(vpmaxuq))]
2864#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2865pub const fn _mm_max_epu64(a: __m128i, b: __m128i) -> __m128i {
2866 unsafe { simd_imax(a.as_u64x2(), b.as_u64x2()).as_m128i() }
2867}
2868
2869/// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2870///
2871/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_max_epu64&expand=3619)
2872#[inline]
2873#[target_feature(enable = "avx512f,avx512vl")]
2874#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2875#[cfg_attr(test, assert_instr(vpmaxuq))]
2876#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2877pub const fn _mm_mask_max_epu64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2878 unsafe {
2879 let max: Simd = _mm_max_epu64(a, b).as_u64x2();
2880 transmute(src:simd_select_bitmask(m:k, yes:max, no:src.as_u64x2()))
2881 }
2882}
2883
2884/// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2885///
2886/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_max_epu64&expand=3620)
2887#[inline]
2888#[target_feature(enable = "avx512f,avx512vl")]
2889#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2890#[cfg_attr(test, assert_instr(vpmaxuq))]
2891#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2892pub const fn _mm_maskz_max_epu64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2893 unsafe {
2894 let max: Simd = _mm_max_epu64(a, b).as_u64x2();
2895 transmute(src:simd_select_bitmask(m:k, yes:max, no:u64x2::ZERO))
2896 }
2897}
2898
2899/// Compare packed signed 32-bit integers in a and b, and store packed minimum values in dst.
2900///
2901/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_min_epi32&expand=3696)
2902#[inline]
2903#[target_feature(enable = "avx512f")]
2904#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2905#[cfg_attr(test, assert_instr(vpminsd))]
2906#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2907pub const fn _mm512_min_epi32(a: __m512i, b: __m512i) -> __m512i {
2908 unsafe { simd_imin(a.as_i32x16(), b.as_i32x16()).as_m512i() }
2909}
2910
2911/// Compare packed signed 32-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2912///
2913/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_min_epi32&expand=3694)
2914#[inline]
2915#[target_feature(enable = "avx512f")]
2916#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2917#[cfg_attr(test, assert_instr(vpminsd))]
2918#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2919pub const fn _mm512_mask_min_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
2920 unsafe {
2921 let min: Simd = _mm512_min_epi32(a, b).as_i32x16();
2922 transmute(src:simd_select_bitmask(m:k, yes:min, no:src.as_i32x16()))
2923 }
2924}
2925
2926/// Compare packed signed 32-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2927///
2928/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_min_epi32&expand=3695)
2929#[inline]
2930#[target_feature(enable = "avx512f")]
2931#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2932#[cfg_attr(test, assert_instr(vpminsd))]
2933#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2934pub const fn _mm512_maskz_min_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
2935 unsafe {
2936 let min: Simd = _mm512_min_epi32(a, b).as_i32x16();
2937 transmute(src:simd_select_bitmask(m:k, yes:min, no:i32x16::ZERO))
2938 }
2939}
2940
2941/// Compare packed signed 32-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2942///
2943/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_min_epi32&expand=3691)
2944#[inline]
2945#[target_feature(enable = "avx512f,avx512vl")]
2946#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2947#[cfg_attr(test, assert_instr(vpminsd))]
2948#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2949pub const fn _mm256_mask_min_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
2950 unsafe {
2951 let min: Simd = _mm256_min_epi32(a, b).as_i32x8();
2952 transmute(src:simd_select_bitmask(m:k, yes:min, no:src.as_i32x8()))
2953 }
2954}
2955
2956/// Compare packed signed 32-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2957///
2958/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_min_epi32&expand=3692)
2959#[inline]
2960#[target_feature(enable = "avx512f,avx512vl")]
2961#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2962#[cfg_attr(test, assert_instr(vpminsd))]
2963#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2964pub const fn _mm256_maskz_min_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
2965 unsafe {
2966 let min: Simd = _mm256_min_epi32(a, b).as_i32x8();
2967 transmute(src:simd_select_bitmask(m:k, yes:min, no:i32x8::ZERO))
2968 }
2969}
2970
2971/// Compare packed signed 32-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2972///
2973/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_min_epi32&expand=3688)
2974#[inline]
2975#[target_feature(enable = "avx512f,avx512vl")]
2976#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2977#[cfg_attr(test, assert_instr(vpminsd))]
2978#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2979pub const fn _mm_mask_min_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2980 unsafe {
2981 let min: Simd = _mm_min_epi32(a, b).as_i32x4();
2982 transmute(src:simd_select_bitmask(m:k, yes:min, no:src.as_i32x4()))
2983 }
2984}
2985
2986/// Compare packed signed 32-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2987///
2988/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_min_epi32&expand=3689)
2989#[inline]
2990#[target_feature(enable = "avx512f,avx512vl")]
2991#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2992#[cfg_attr(test, assert_instr(vpminsd))]
2993#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2994pub const fn _mm_maskz_min_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2995 unsafe {
2996 let min: Simd = _mm_min_epi32(a, b).as_i32x4();
2997 transmute(src:simd_select_bitmask(m:k, yes:min, no:i32x4::ZERO))
2998 }
2999}
3000
3001/// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst.
3002///
3003/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_min_epi64&expand=3705)
3004#[inline]
3005#[target_feature(enable = "avx512f")]
3006#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3007#[cfg_attr(test, assert_instr(vpminsq))]
3008#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3009pub const fn _mm512_min_epi64(a: __m512i, b: __m512i) -> __m512i {
3010 unsafe { simd_imin(a.as_i64x8(), b.as_i64x8()).as_m512i() }
3011}
3012
3013/// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3014///
3015/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_min_epi64&expand=3703)
3016#[inline]
3017#[target_feature(enable = "avx512f")]
3018#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3019#[cfg_attr(test, assert_instr(vpminsq))]
3020#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3021pub const fn _mm512_mask_min_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
3022 unsafe {
3023 let min: Simd = _mm512_min_epi64(a, b).as_i64x8();
3024 transmute(src:simd_select_bitmask(m:k, yes:min, no:src.as_i64x8()))
3025 }
3026}
3027
3028/// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3029///
3030/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_min_epi64&expand=3704)
3031#[inline]
3032#[target_feature(enable = "avx512f")]
3033#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3034#[cfg_attr(test, assert_instr(vpminsq))]
3035#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3036pub const fn _mm512_maskz_min_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
3037 unsafe {
3038 let min: Simd = _mm512_min_epi64(a, b).as_i64x8();
3039 transmute(src:simd_select_bitmask(m:k, yes:min, no:i64x8::ZERO))
3040 }
3041}
3042
3043/// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst.
3044///
3045/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_min_epi64&expand=3702)
3046#[inline]
3047#[target_feature(enable = "avx512f,avx512vl")]
3048#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3049#[cfg_attr(test, assert_instr(vpminsq))]
3050#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3051pub const fn _mm256_min_epi64(a: __m256i, b: __m256i) -> __m256i {
3052 unsafe { simd_imin(a.as_i64x4(), b.as_i64x4()).as_m256i() }
3053}
3054
3055/// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3056///
3057/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_min_epi64&expand=3700)
3058#[inline]
3059#[target_feature(enable = "avx512f,avx512vl")]
3060#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3061#[cfg_attr(test, assert_instr(vpminsq))]
3062#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3063pub const fn _mm256_mask_min_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
3064 unsafe {
3065 let min: Simd = _mm256_min_epi64(a, b).as_i64x4();
3066 transmute(src:simd_select_bitmask(m:k, yes:min, no:src.as_i64x4()))
3067 }
3068}
3069
3070/// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3071///
3072/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_min_epi64&expand=3701)
3073#[inline]
3074#[target_feature(enable = "avx512f,avx512vl")]
3075#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3076#[cfg_attr(test, assert_instr(vpminsq))]
3077#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3078pub const fn _mm256_maskz_min_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
3079 unsafe {
3080 let min: Simd = _mm256_min_epi64(a, b).as_i64x4();
3081 transmute(src:simd_select_bitmask(m:k, yes:min, no:i64x4::ZERO))
3082 }
3083}
3084
3085/// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst.
3086///
3087/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_min_epi64)
3088#[inline]
3089#[target_feature(enable = "avx512f,avx512vl")]
3090#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3091#[cfg_attr(test, assert_instr(vpminsq))]
3092#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3093pub const fn _mm_min_epi64(a: __m128i, b: __m128i) -> __m128i {
3094 unsafe { simd_imin(a.as_i64x2(), b.as_i64x2()).as_m128i() }
3095}
3096
3097/// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3098///
3099/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_min_epi64)
3100#[inline]
3101#[target_feature(enable = "avx512f,avx512vl")]
3102#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3103#[cfg_attr(test, assert_instr(vpminsq))]
3104#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3105pub const fn _mm_mask_min_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
3106 unsafe {
3107 let min: Simd = _mm_min_epi64(a, b).as_i64x2();
3108 transmute(src:simd_select_bitmask(m:k, yes:min, no:src.as_i64x2()))
3109 }
3110}
3111
3112/// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3113///
3114/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_min_epi64)
3115#[inline]
3116#[target_feature(enable = "avx512f,avx512vl")]
3117#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3118#[cfg_attr(test, assert_instr(vpminsq))]
3119#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3120pub const fn _mm_maskz_min_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
3121 unsafe {
3122 let min: Simd = _mm_min_epi64(a, b).as_i64x2();
3123 transmute(src:simd_select_bitmask(m:k, yes:min, no:i64x2::ZERO))
3124 }
3125}
3126
3127/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst.
3128///
3129/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_min_ps&expand=3769)
3130#[inline]
3131#[target_feature(enable = "avx512f")]
3132#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3133#[cfg_attr(test, assert_instr(vminps))]
3134pub fn _mm512_min_ps(a: __m512, b: __m512) -> __m512 {
3135 unsafe {
3136 transmute(src:vminps(
3137 a.as_f32x16(),
3138 b.as_f32x16(),
3139 _MM_FROUND_CUR_DIRECTION,
3140 ))
3141 }
3142}
3143
3144/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3145///
3146/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_min_ps&expand=3767)
3147#[inline]
3148#[target_feature(enable = "avx512f")]
3149#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3150#[cfg_attr(test, assert_instr(vminps))]
3151pub fn _mm512_mask_min_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
3152 unsafe {
3153 let min: Simd = _mm512_min_ps(a, b).as_f32x16();
3154 transmute(src:simd_select_bitmask(m:k, yes:min, no:src.as_f32x16()))
3155 }
3156}
3157
3158/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3159///
3160/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_min_ps&expand=3768)
3161#[inline]
3162#[target_feature(enable = "avx512f")]
3163#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3164#[cfg_attr(test, assert_instr(vminps))]
3165pub fn _mm512_maskz_min_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
3166 unsafe {
3167 let min: Simd = _mm512_min_ps(a, b).as_f32x16();
3168 transmute(src:simd_select_bitmask(m:k, yes:min, no:f32x16::ZERO))
3169 }
3170}
3171
3172/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3173///
3174/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_min_ps&expand=3764)
3175#[inline]
3176#[target_feature(enable = "avx512f,avx512vl")]
3177#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3178#[cfg_attr(test, assert_instr(vminps))]
3179pub fn _mm256_mask_min_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
3180 unsafe {
3181 let min: Simd = _mm256_min_ps(a, b).as_f32x8();
3182 transmute(src:simd_select_bitmask(m:k, yes:min, no:src.as_f32x8()))
3183 }
3184}
3185
3186/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3187///
3188/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_min_ps&expand=3765)
3189#[inline]
3190#[target_feature(enable = "avx512f,avx512vl")]
3191#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3192#[cfg_attr(test, assert_instr(vminps))]
3193pub fn _mm256_maskz_min_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
3194 unsafe {
3195 let min: Simd = _mm256_min_ps(a, b).as_f32x8();
3196 transmute(src:simd_select_bitmask(m:k, yes:min, no:f32x8::ZERO))
3197 }
3198}
3199
3200/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3201///
3202/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_min_ps&expand=3761)
3203#[inline]
3204#[target_feature(enable = "avx512f,avx512vl")]
3205#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3206#[cfg_attr(test, assert_instr(vminps))]
3207pub fn _mm_mask_min_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
3208 unsafe {
3209 let min: Simd = _mm_min_ps(a, b).as_f32x4();
3210 transmute(src:simd_select_bitmask(m:k, yes:min, no:src.as_f32x4()))
3211 }
3212}
3213
3214/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3215///
3216/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_min_ps&expand=3762)
3217#[inline]
3218#[target_feature(enable = "avx512f,avx512vl")]
3219#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3220#[cfg_attr(test, assert_instr(vminps))]
3221pub fn _mm_maskz_min_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
3222 unsafe {
3223 let min: Simd = _mm_min_ps(a, b).as_f32x4();
3224 transmute(src:simd_select_bitmask(m:k, yes:min, no:f32x4::ZERO))
3225 }
3226}
3227
3228/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst.
3229///
3230/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_min_pd&expand=3759)
3231#[inline]
3232#[target_feature(enable = "avx512f")]
3233#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3234#[cfg_attr(test, assert_instr(vminpd))]
3235pub fn _mm512_min_pd(a: __m512d, b: __m512d) -> __m512d {
3236 unsafe { transmute(src:vminpd(a.as_f64x8(), b.as_f64x8(), _MM_FROUND_CUR_DIRECTION)) }
3237}
3238
3239/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3240///
3241/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_min_pd&expand=3757)
3242#[inline]
3243#[target_feature(enable = "avx512f")]
3244#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3245#[cfg_attr(test, assert_instr(vminpd))]
3246pub fn _mm512_mask_min_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
3247 unsafe {
3248 let min: Simd = _mm512_min_pd(a, b).as_f64x8();
3249 transmute(src:simd_select_bitmask(m:k, yes:min, no:src.as_f64x8()))
3250 }
3251}
3252
3253/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3254///
3255/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_min_pd&expand=3758)
3256#[inline]
3257#[target_feature(enable = "avx512f")]
3258#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3259#[cfg_attr(test, assert_instr(vminpd))]
3260pub fn _mm512_maskz_min_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
3261 unsafe {
3262 let min: Simd = _mm512_min_pd(a, b).as_f64x8();
3263 transmute(src:simd_select_bitmask(m:k, yes:min, no:f64x8::ZERO))
3264 }
3265}
3266
3267/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3268///
3269/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_min_pd&expand=3754)
3270#[inline]
3271#[target_feature(enable = "avx512f,avx512vl")]
3272#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3273#[cfg_attr(test, assert_instr(vminpd))]
3274pub fn _mm256_mask_min_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
3275 unsafe {
3276 let min: Simd = _mm256_min_pd(a, b).as_f64x4();
3277 transmute(src:simd_select_bitmask(m:k, yes:min, no:src.as_f64x4()))
3278 }
3279}
3280
3281/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3282///
3283/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_min_pd&expand=3755)
3284#[inline]
3285#[target_feature(enable = "avx512f,avx512vl")]
3286#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3287#[cfg_attr(test, assert_instr(vminpd))]
3288pub fn _mm256_maskz_min_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
3289 unsafe {
3290 let min: Simd = _mm256_min_pd(a, b).as_f64x4();
3291 transmute(src:simd_select_bitmask(m:k, yes:min, no:f64x4::ZERO))
3292 }
3293}
3294
3295/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3296///
3297/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_min_pd&expand=3751)
3298#[inline]
3299#[target_feature(enable = "avx512f,avx512vl")]
3300#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3301#[cfg_attr(test, assert_instr(vminpd))]
3302pub fn _mm_mask_min_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
3303 unsafe {
3304 let min: Simd = _mm_min_pd(a, b).as_f64x2();
3305 transmute(src:simd_select_bitmask(m:k, yes:min, no:src.as_f64x2()))
3306 }
3307}
3308
3309/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3310///
3311/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_min_pd&expand=3752)
3312#[inline]
3313#[target_feature(enable = "avx512f,avx512vl")]
3314#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3315#[cfg_attr(test, assert_instr(vminpd))]
3316pub fn _mm_maskz_min_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
3317 unsafe {
3318 let min: Simd = _mm_min_pd(a, b).as_f64x2();
3319 transmute(src:simd_select_bitmask(m:k, yes:min, no:f64x2::ZERO))
3320 }
3321}
3322
3323/// Compare packed unsigned 32-bit integers in a and b, and store packed minimum values in dst.
3324///
3325/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_min_epu32&expand=3732)
3326#[inline]
3327#[target_feature(enable = "avx512f")]
3328#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3329#[cfg_attr(test, assert_instr(vpminud))]
3330#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3331pub const fn _mm512_min_epu32(a: __m512i, b: __m512i) -> __m512i {
3332 unsafe { simd_imin(a.as_u32x16(), b.as_u32x16()).as_m512i() }
3333}
3334
3335/// Compare packed unsigned 32-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3336///
3337/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_min_epu32&expand=3730)
3338#[inline]
3339#[target_feature(enable = "avx512f")]
3340#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3341#[cfg_attr(test, assert_instr(vpminud))]
3342#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3343pub const fn _mm512_mask_min_epu32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
3344 unsafe {
3345 let min: Simd = _mm512_min_epu32(a, b).as_u32x16();
3346 transmute(src:simd_select_bitmask(m:k, yes:min, no:src.as_u32x16()))
3347 }
3348}
3349
3350/// Compare packed unsigned 32-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3351///
3352/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_min_epu32&expand=3731)
3353#[inline]
3354#[target_feature(enable = "avx512f")]
3355#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3356#[cfg_attr(test, assert_instr(vpminud))]
3357#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3358pub const fn _mm512_maskz_min_epu32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
3359 unsafe {
3360 let min: Simd = _mm512_min_epu32(a, b).as_u32x16();
3361 transmute(src:simd_select_bitmask(m:k, yes:min, no:u32x16::ZERO))
3362 }
3363}
3364
3365/// Compare packed unsigned 32-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3366///
3367/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_min_epu32&expand=3727)
3368#[inline]
3369#[target_feature(enable = "avx512f,avx512vl")]
3370#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3371#[cfg_attr(test, assert_instr(vpminud))]
3372#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3373pub const fn _mm256_mask_min_epu32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
3374 unsafe {
3375 let min: Simd = _mm256_min_epu32(a, b).as_u32x8();
3376 transmute(src:simd_select_bitmask(m:k, yes:min, no:src.as_u32x8()))
3377 }
3378}
3379
3380/// Compare packed unsigned 32-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3381///
3382/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_min_epu32&expand=3728)
3383#[inline]
3384#[target_feature(enable = "avx512f,avx512vl")]
3385#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3386#[cfg_attr(test, assert_instr(vpminud))]
3387#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3388pub const fn _mm256_maskz_min_epu32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
3389 unsafe {
3390 let min: Simd = _mm256_min_epu32(a, b).as_u32x8();
3391 transmute(src:simd_select_bitmask(m:k, yes:min, no:u32x8::ZERO))
3392 }
3393}
3394
3395/// Compare packed unsigned 32-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3396///
3397/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_min_epu32&expand=3724)
3398#[inline]
3399#[target_feature(enable = "avx512f,avx512vl")]
3400#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3401#[cfg_attr(test, assert_instr(vpminud))]
3402#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3403pub const fn _mm_mask_min_epu32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
3404 unsafe {
3405 let min: Simd = _mm_min_epu32(a, b).as_u32x4();
3406 transmute(src:simd_select_bitmask(m:k, yes:min, no:src.as_u32x4()))
3407 }
3408}
3409
3410/// Compare packed unsigned 32-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3411///
3412/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_min_epu32&expand=3725)
3413#[inline]
3414#[target_feature(enable = "avx512f,avx512vl")]
3415#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3416#[cfg_attr(test, assert_instr(vpminud))]
3417#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3418pub const fn _mm_maskz_min_epu32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
3419 unsafe {
3420 let min: Simd = _mm_min_epu32(a, b).as_u32x4();
3421 transmute(src:simd_select_bitmask(m:k, yes:min, no:u32x4::ZERO))
3422 }
3423}
3424
3425/// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst.
3426///
3427/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_min_epu64&expand=3741)
3428#[inline]
3429#[target_feature(enable = "avx512f")]
3430#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3431#[cfg_attr(test, assert_instr(vpminuq))]
3432#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3433pub const fn _mm512_min_epu64(a: __m512i, b: __m512i) -> __m512i {
3434 unsafe { simd_imin(a.as_u64x8(), b.as_u64x8()).as_m512i() }
3435}
3436
3437/// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3438///
3439/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_min_epu64&expand=3739)
3440#[inline]
3441#[target_feature(enable = "avx512f")]
3442#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3443#[cfg_attr(test, assert_instr(vpminuq))]
3444#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3445pub const fn _mm512_mask_min_epu64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
3446 unsafe {
3447 let min: Simd = _mm512_min_epu64(a, b).as_u64x8();
3448 transmute(src:simd_select_bitmask(m:k, yes:min, no:src.as_u64x8()))
3449 }
3450}
3451
3452/// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3453///
3454/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_min_epu64&expand=3740)
3455#[inline]
3456#[target_feature(enable = "avx512f")]
3457#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3458#[cfg_attr(test, assert_instr(vpminuq))]
3459#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3460pub const fn _mm512_maskz_min_epu64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
3461 unsafe {
3462 let min: Simd = _mm512_min_epu64(a, b).as_u64x8();
3463 transmute(src:simd_select_bitmask(m:k, yes:min, no:u64x8::ZERO))
3464 }
3465}
3466
3467/// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst.
3468///
3469/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_min_epu64&expand=3738)
3470#[inline]
3471#[target_feature(enable = "avx512f,avx512vl")]
3472#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3473#[cfg_attr(test, assert_instr(vpminuq))]
3474#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3475pub const fn _mm256_min_epu64(a: __m256i, b: __m256i) -> __m256i {
3476 unsafe { simd_imin(a.as_u64x4(), b.as_u64x4()).as_m256i() }
3477}
3478
3479/// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3480///
3481/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_min_epu64&expand=3736)
3482#[inline]
3483#[target_feature(enable = "avx512f,avx512vl")]
3484#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3485#[cfg_attr(test, assert_instr(vpminuq))]
3486#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3487pub const fn _mm256_mask_min_epu64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
3488 unsafe {
3489 let min: Simd = _mm256_min_epu64(a, b).as_u64x4();
3490 transmute(src:simd_select_bitmask(m:k, yes:min, no:src.as_u64x4()))
3491 }
3492}
3493
3494/// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3495///
3496/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_min_epu64&expand=3737)
3497#[inline]
3498#[target_feature(enable = "avx512f,avx512vl")]
3499#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3500#[cfg_attr(test, assert_instr(vpminuq))]
3501#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3502pub const fn _mm256_maskz_min_epu64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
3503 unsafe {
3504 let min: Simd = _mm256_min_epu64(a, b).as_u64x4();
3505 transmute(src:simd_select_bitmask(m:k, yes:min, no:u64x4::ZERO))
3506 }
3507}
3508
3509/// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst.
3510///
3511/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_min_epu64&expand=3735)
3512#[inline]
3513#[target_feature(enable = "avx512f,avx512vl")]
3514#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3515#[cfg_attr(test, assert_instr(vpminuq))]
3516#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3517pub const fn _mm_min_epu64(a: __m128i, b: __m128i) -> __m128i {
3518 unsafe { simd_imin(a.as_u64x2(), b.as_u64x2()).as_m128i() }
3519}
3520
3521/// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3522///
3523/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_min_epu64&expand=3733)
3524#[inline]
3525#[target_feature(enable = "avx512f,avx512vl")]
3526#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3527#[cfg_attr(test, assert_instr(vpminuq))]
3528#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3529pub const fn _mm_mask_min_epu64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
3530 unsafe {
3531 let min: Simd = _mm_min_epu64(a, b).as_u64x2();
3532 transmute(src:simd_select_bitmask(m:k, yes:min, no:src.as_u64x2()))
3533 }
3534}
3535
3536/// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3537///
3538/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_min_epu64&expand=3734)
3539#[inline]
3540#[target_feature(enable = "avx512f,avx512vl")]
3541#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3542#[cfg_attr(test, assert_instr(vpminuq))]
3543#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3544pub const fn _mm_maskz_min_epu64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
3545 unsafe {
3546 let min: Simd = _mm_min_epu64(a, b).as_u64x2();
3547 transmute(src:simd_select_bitmask(m:k, yes:min, no:u64x2::ZERO))
3548 }
3549}
3550
3551/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst.
3552///
3553/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sqrt_ps&expand=5371)
3554#[inline]
3555#[target_feature(enable = "avx512f")]
3556#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3557#[cfg_attr(test, assert_instr(vsqrtps))]
3558pub fn _mm512_sqrt_ps(a: __m512) -> __m512 {
3559 unsafe { simd_fsqrt(a) }
3560}
3561
3562/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3563///
3564/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sqrt_ps&expand=5369)
3565#[inline]
3566#[target_feature(enable = "avx512f")]
3567#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3568#[cfg_attr(test, assert_instr(vsqrtps))]
3569pub fn _mm512_mask_sqrt_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 {
3570 unsafe { simd_select_bitmask(m:k, yes:simd_fsqrt(a), no:src) }
3571}
3572
3573/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3574///
3575/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sqrt_ps&expand=5370)
3576#[inline]
3577#[target_feature(enable = "avx512f")]
3578#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3579#[cfg_attr(test, assert_instr(vsqrtps))]
3580pub fn _mm512_maskz_sqrt_ps(k: __mmask16, a: __m512) -> __m512 {
3581 unsafe { simd_select_bitmask(m:k, yes:simd_fsqrt(a), no:_mm512_setzero_ps()) }
3582}
3583
3584/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3585///
3586/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sqrt_ps&expand=5366)
3587#[inline]
3588#[target_feature(enable = "avx512f,avx512vl")]
3589#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3590#[cfg_attr(test, assert_instr(vsqrtps))]
3591pub fn _mm256_mask_sqrt_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 {
3592 unsafe { simd_select_bitmask(m:k, yes:simd_fsqrt(a), no:src) }
3593}
3594
3595/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3596///
3597/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sqrt_ps&expand=5367)
3598#[inline]
3599#[target_feature(enable = "avx512f,avx512vl")]
3600#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3601#[cfg_attr(test, assert_instr(vsqrtps))]
3602pub fn _mm256_maskz_sqrt_ps(k: __mmask8, a: __m256) -> __m256 {
3603 unsafe { simd_select_bitmask(m:k, yes:simd_fsqrt(a), no:_mm256_setzero_ps()) }
3604}
3605
3606/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3607///
3608/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sqrt_ps&expand=5363)
3609#[inline]
3610#[target_feature(enable = "avx512f,avx512vl")]
3611#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3612#[cfg_attr(test, assert_instr(vsqrtps))]
3613pub fn _mm_mask_sqrt_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 {
3614 unsafe { simd_select_bitmask(m:k, yes:simd_fsqrt(a), no:src) }
3615}
3616
3617/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3618///
3619/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sqrt_ps&expand=5364)
3620#[inline]
3621#[target_feature(enable = "avx512f,avx512vl")]
3622#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3623#[cfg_attr(test, assert_instr(vsqrtps))]
3624pub fn _mm_maskz_sqrt_ps(k: __mmask8, a: __m128) -> __m128 {
3625 unsafe { simd_select_bitmask(m:k, yes:simd_fsqrt(a), no:_mm_setzero_ps()) }
3626}
3627
3628/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst.
3629///
3630/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sqrt_pd&expand=5362)
3631#[inline]
3632#[target_feature(enable = "avx512f")]
3633#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3634#[cfg_attr(test, assert_instr(vsqrtpd))]
3635pub fn _mm512_sqrt_pd(a: __m512d) -> __m512d {
3636 unsafe { simd_fsqrt(a) }
3637}
3638
3639/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3640///
3641/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sqrt_pd&expand=5360)
3642#[inline]
3643#[target_feature(enable = "avx512f")]
3644#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3645#[cfg_attr(test, assert_instr(vsqrtpd))]
3646pub fn _mm512_mask_sqrt_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d {
3647 unsafe { simd_select_bitmask(m:k, yes:simd_fsqrt(a), no:src) }
3648}
3649
3650/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3651///
3652/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sqrt_pd&expand=5361)
3653#[inline]
3654#[target_feature(enable = "avx512f")]
3655#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3656#[cfg_attr(test, assert_instr(vsqrtpd))]
3657pub fn _mm512_maskz_sqrt_pd(k: __mmask8, a: __m512d) -> __m512d {
3658 unsafe { simd_select_bitmask(m:k, yes:simd_fsqrt(a), no:_mm512_setzero_pd()) }
3659}
3660
3661/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3662///
3663/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sqrt_pd&expand=5357)
3664#[inline]
3665#[target_feature(enable = "avx512f,avx512vl")]
3666#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3667#[cfg_attr(test, assert_instr(vsqrtpd))]
3668pub fn _mm256_mask_sqrt_pd(src: __m256d, k: __mmask8, a: __m256d) -> __m256d {
3669 unsafe { simd_select_bitmask(m:k, yes:simd_fsqrt(a), no:src) }
3670}
3671
3672/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3673///
3674/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sqrt_pd&expand=5358)
3675#[inline]
3676#[target_feature(enable = "avx512f,avx512vl")]
3677#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3678#[cfg_attr(test, assert_instr(vsqrtpd))]
3679pub fn _mm256_maskz_sqrt_pd(k: __mmask8, a: __m256d) -> __m256d {
3680 unsafe { simd_select_bitmask(m:k, yes:simd_fsqrt(a), no:_mm256_setzero_pd()) }
3681}
3682
3683/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3684///
3685/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sqrt_pd&expand=5354)
3686#[inline]
3687#[target_feature(enable = "avx512f,avx512vl")]
3688#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3689#[cfg_attr(test, assert_instr(vsqrtpd))]
3690pub fn _mm_mask_sqrt_pd(src: __m128d, k: __mmask8, a: __m128d) -> __m128d {
3691 unsafe { simd_select_bitmask(m:k, yes:simd_fsqrt(a), no:src) }
3692}
3693
3694/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3695///
3696/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sqrt_pd&expand=5355)
3697#[inline]
3698#[target_feature(enable = "avx512f,avx512vl")]
3699#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3700#[cfg_attr(test, assert_instr(vsqrtpd))]
3701pub fn _mm_maskz_sqrt_pd(k: __mmask8, a: __m128d) -> __m128d {
3702 unsafe { simd_select_bitmask(m:k, yes:simd_fsqrt(a), no:_mm_setzero_pd()) }
3703}
3704
3705/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst.
3706///
3707/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmadd_ps&expand=2557)
3708#[inline]
3709#[target_feature(enable = "avx512f")]
3710#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3711#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
3712#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3713pub const fn _mm512_fmadd_ps(a: __m512, b: __m512, c: __m512) -> __m512 {
3714 unsafe { simd_fma(x:a, y:b, z:c) }
3715}
3716
3717/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3718///
3719/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmadd_ps&expand=2558)
3720#[inline]
3721#[target_feature(enable = "avx512f")]
3722#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3723#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
3724#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3725pub const fn _mm512_mask_fmadd_ps(a: __m512, k: __mmask16, b: __m512, c: __m512) -> __m512 {
3726 unsafe { simd_select_bitmask(m:k, yes:_mm512_fmadd_ps(a, b, c), no:a) }
3727}
3728
3729/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3730///
3731/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmadd_ps&expand=2560)
3732#[inline]
3733#[target_feature(enable = "avx512f")]
3734#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3735#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
3736#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3737pub const fn _mm512_maskz_fmadd_ps(k: __mmask16, a: __m512, b: __m512, c: __m512) -> __m512 {
3738 unsafe { simd_select_bitmask(m:k, yes:_mm512_fmadd_ps(a, b, c), no:_mm512_setzero_ps()) }
3739}
3740
3741/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3742///
3743/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmadd_ps&expand=2559)
3744#[inline]
3745#[target_feature(enable = "avx512f")]
3746#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3747#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
3748#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3749pub const fn _mm512_mask3_fmadd_ps(a: __m512, b: __m512, c: __m512, k: __mmask16) -> __m512 {
3750 unsafe { simd_select_bitmask(m:k, yes:_mm512_fmadd_ps(a, b, c), no:c) }
3751}
3752
3753/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3754///
3755/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fmadd_ps&expand=2554)
3756#[inline]
3757#[target_feature(enable = "avx512f,avx512vl")]
3758#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3759#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
3760#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3761pub const fn _mm256_mask_fmadd_ps(a: __m256, k: __mmask8, b: __m256, c: __m256) -> __m256 {
3762 unsafe { simd_select_bitmask(m:k, yes:_mm256_fmadd_ps(a, b, c), no:a) }
3763}
3764
3765/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3766///
3767/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fmadd_ps&expand=2556)
3768#[inline]
3769#[target_feature(enable = "avx512f,avx512vl")]
3770#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3771#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
3772#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3773pub const fn _mm256_maskz_fmadd_ps(k: __mmask8, a: __m256, b: __m256, c: __m256) -> __m256 {
3774 unsafe { simd_select_bitmask(m:k, yes:_mm256_fmadd_ps(a, b, c), no:_mm256_setzero_ps()) }
3775}
3776
3777/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3778///
3779/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fmadd_ps&expand=2555)
3780#[inline]
3781#[target_feature(enable = "avx512f,avx512vl")]
3782#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3783#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
3784#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3785pub const fn _mm256_mask3_fmadd_ps(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256 {
3786 unsafe { simd_select_bitmask(m:k, yes:_mm256_fmadd_ps(a, b, c), no:c) }
3787}
3788
3789/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3790///
3791/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fmadd_ps&expand=2550)
3792#[inline]
3793#[target_feature(enable = "avx512f,avx512vl")]
3794#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3795#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
3796#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3797pub const fn _mm_mask_fmadd_ps(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
3798 unsafe { simd_select_bitmask(m:k, yes:_mm_fmadd_ps(a, b, c), no:a) }
3799}
3800
3801/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3802///
3803/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fmadd_ps&expand=2552)
3804#[inline]
3805#[target_feature(enable = "avx512f,avx512vl")]
3806#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3807#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
3808#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3809pub const fn _mm_maskz_fmadd_ps(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
3810 unsafe { simd_select_bitmask(m:k, yes:_mm_fmadd_ps(a, b, c), no:_mm_setzero_ps()) }
3811}
3812
3813/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3814///
3815/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fmadd_ps&expand=2551)
3816#[inline]
3817#[target_feature(enable = "avx512f,avx512vl")]
3818#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3819#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
3820#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3821pub const fn _mm_mask3_fmadd_ps(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
3822 unsafe { simd_select_bitmask(m:k, yes:_mm_fmadd_ps(a, b, c), no:c) }
3823}
3824
3825/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst.
3826///
3827/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmadd_pd&expand=2545)
3828#[inline]
3829#[target_feature(enable = "avx512f")]
3830#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3831#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
3832#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3833pub const fn _mm512_fmadd_pd(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
3834 unsafe { simd_fma(x:a, y:b, z:c) }
3835}
3836
3837/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3838///
3839/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmadd_pd&expand=2546)
3840#[inline]
3841#[target_feature(enable = "avx512f")]
3842#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3843#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
3844#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3845pub const fn _mm512_mask_fmadd_pd(a: __m512d, k: __mmask8, b: __m512d, c: __m512d) -> __m512d {
3846 unsafe { simd_select_bitmask(m:k, yes:_mm512_fmadd_pd(a, b, c), no:a) }
3847}
3848
3849/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3850///
3851/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmadd_pd&expand=2548)
3852#[inline]
3853#[target_feature(enable = "avx512f")]
3854#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3855#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
3856#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3857pub const fn _mm512_maskz_fmadd_pd(k: __mmask8, a: __m512d, b: __m512d, c: __m512d) -> __m512d {
3858 unsafe { simd_select_bitmask(m:k, yes:_mm512_fmadd_pd(a, b, c), no:_mm512_setzero_pd()) }
3859}
3860
3861/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3862///
3863/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmadd_pd&expand=2547)
3864#[inline]
3865#[target_feature(enable = "avx512f")]
3866#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3867#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
3868#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3869pub const fn _mm512_mask3_fmadd_pd(a: __m512d, b: __m512d, c: __m512d, k: __mmask8) -> __m512d {
3870 unsafe { simd_select_bitmask(m:k, yes:_mm512_fmadd_pd(a, b, c), no:c) }
3871}
3872
3873/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3874///
3875/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fmadd_pd&expand=2542)
3876#[inline]
3877#[target_feature(enable = "avx512f,avx512vl")]
3878#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3879#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
3880#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3881pub const fn _mm256_mask_fmadd_pd(a: __m256d, k: __mmask8, b: __m256d, c: __m256d) -> __m256d {
3882 unsafe { simd_select_bitmask(m:k, yes:_mm256_fmadd_pd(a, b, c), no:a) }
3883}
3884
3885/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3886///
3887/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fmadd_pd&expand=2544)
3888#[inline]
3889#[target_feature(enable = "avx512f,avx512vl")]
3890#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3891#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
3892#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3893pub const fn _mm256_maskz_fmadd_pd(k: __mmask8, a: __m256d, b: __m256d, c: __m256d) -> __m256d {
3894 unsafe { simd_select_bitmask(m:k, yes:_mm256_fmadd_pd(a, b, c), no:_mm256_setzero_pd()) }
3895}
3896
3897/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3898///
3899/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fmadd_pd&expand=2543)
3900#[inline]
3901#[target_feature(enable = "avx512f,avx512vl")]
3902#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3903#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
3904#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3905pub const fn _mm256_mask3_fmadd_pd(a: __m256d, b: __m256d, c: __m256d, k: __mmask8) -> __m256d {
3906 unsafe { simd_select_bitmask(m:k, yes:_mm256_fmadd_pd(a, b, c), no:c) }
3907}
3908
3909/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3910///
3911/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fmadd_pd&expand=2538)
3912#[inline]
3913#[target_feature(enable = "avx512f,avx512vl")]
3914#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3915#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
3916#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3917pub const fn _mm_mask_fmadd_pd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
3918 unsafe { simd_select_bitmask(m:k, yes:_mm_fmadd_pd(a, b, c), no:a) }
3919}
3920
3921/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3922///
3923/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fmadd_pd&expand=2540)
3924#[inline]
3925#[target_feature(enable = "avx512f,avx512vl")]
3926#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3927#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
3928#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3929pub const fn _mm_maskz_fmadd_pd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
3930 unsafe { simd_select_bitmask(m:k, yes:_mm_fmadd_pd(a, b, c), no:_mm_setzero_pd()) }
3931}
3932
3933/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3934///
3935/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fmadd_pd&expand=2539)
3936#[inline]
3937#[target_feature(enable = "avx512f,avx512vl")]
3938#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3939#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
3940#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3941pub const fn _mm_mask3_fmadd_pd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
3942 unsafe { simd_select_bitmask(m:k, yes:_mm_fmadd_pd(a, b, c), no:c) }
3943}
3944
3945/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst.
3946///
3947/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmsub_ps&expand=2643)
3948#[inline]
3949#[target_feature(enable = "avx512f")]
3950#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3951#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
3952#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3953pub const fn _mm512_fmsub_ps(a: __m512, b: __m512, c: __m512) -> __m512 {
3954 unsafe { simd_fma(x:a, y:b, z:simd_neg(c)) }
3955}
3956
3957/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3958///
3959/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmsub_ps&expand=2644)
3960#[inline]
3961#[target_feature(enable = "avx512f")]
3962#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3963#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
3964#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3965pub const fn _mm512_mask_fmsub_ps(a: __m512, k: __mmask16, b: __m512, c: __m512) -> __m512 {
3966 unsafe { simd_select_bitmask(m:k, yes:_mm512_fmsub_ps(a, b, c), no:a) }
3967}
3968
3969/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3970///
3971/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmsub_ps&expand=2646)
3972#[inline]
3973#[target_feature(enable = "avx512f")]
3974#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3975#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
3976#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3977pub const fn _mm512_maskz_fmsub_ps(k: __mmask16, a: __m512, b: __m512, c: __m512) -> __m512 {
3978 unsafe { simd_select_bitmask(m:k, yes:_mm512_fmsub_ps(a, b, c), no:_mm512_setzero_ps()) }
3979}
3980
3981/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3982///
3983/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmsub_ps&expand=2645)
3984#[inline]
3985#[target_feature(enable = "avx512f")]
3986#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3987#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
3988#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3989pub const fn _mm512_mask3_fmsub_ps(a: __m512, b: __m512, c: __m512, k: __mmask16) -> __m512 {
3990 unsafe { simd_select_bitmask(m:k, yes:_mm512_fmsub_ps(a, b, c), no:c) }
3991}
3992
3993/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3994///
3995/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fmsub_ps&expand=2640)
3996#[inline]
3997#[target_feature(enable = "avx512f,avx512vl")]
3998#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3999#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
4000#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4001pub const fn _mm256_mask_fmsub_ps(a: __m256, k: __mmask8, b: __m256, c: __m256) -> __m256 {
4002 unsafe { simd_select_bitmask(m:k, yes:_mm256_fmsub_ps(a, b, c), no:a) }
4003}
4004
4005/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4006///
4007/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fmsub_ps&expand=2642)
4008#[inline]
4009#[target_feature(enable = "avx512f,avx512vl")]
4010#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4011#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
4012#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4013pub const fn _mm256_maskz_fmsub_ps(k: __mmask8, a: __m256, b: __m256, c: __m256) -> __m256 {
4014 unsafe { simd_select_bitmask(m:k, yes:_mm256_fmsub_ps(a, b, c), no:_mm256_setzero_ps()) }
4015}
4016
4017/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4018///
4019/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fmsub_ps&expand=2641)
4020#[inline]
4021#[target_feature(enable = "avx512f,avx512vl")]
4022#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4023#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
4024#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4025pub const fn _mm256_mask3_fmsub_ps(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256 {
4026 unsafe { simd_select_bitmask(m:k, yes:_mm256_fmsub_ps(a, b, c), no:c) }
4027}
4028
4029/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4030///
4031/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fmsub_ps&expand=2636)
4032#[inline]
4033#[target_feature(enable = "avx512f,avx512vl")]
4034#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4035#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
4036#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4037pub const fn _mm_mask_fmsub_ps(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
4038 unsafe { simd_select_bitmask(m:k, yes:_mm_fmsub_ps(a, b, c), no:a) }
4039}
4040
4041/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4042///
4043/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fmsub_ps&expand=2638)
4044#[inline]
4045#[target_feature(enable = "avx512f,avx512vl")]
4046#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4047#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
4048#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4049pub const fn _mm_maskz_fmsub_ps(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
4050 unsafe { simd_select_bitmask(m:k, yes:_mm_fmsub_ps(a, b, c), no:_mm_setzero_ps()) }
4051}
4052
4053/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4054///
4055/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fmsub_ps&expand=2637)
4056#[inline]
4057#[target_feature(enable = "avx512f,avx512vl")]
4058#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4059#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
4060#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4061pub const fn _mm_mask3_fmsub_ps(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
4062 unsafe { simd_select_bitmask(m:k, yes:_mm_fmsub_ps(a, b, c), no:c) }
4063}
4064
4065/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst.
4066///
4067/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmsub_pd&expand=2631)
4068#[inline]
4069#[target_feature(enable = "avx512f")]
4070#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4071#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
4072#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4073pub const fn _mm512_fmsub_pd(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
4074 unsafe { simd_fma(x:a, y:b, z:simd_neg(c)) }
4075}
4076
4077/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4078///
4079/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmsub_pd&expand=2632)
4080#[inline]
4081#[target_feature(enable = "avx512f")]
4082#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4083#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
4084#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4085pub const fn _mm512_mask_fmsub_pd(a: __m512d, k: __mmask8, b: __m512d, c: __m512d) -> __m512d {
4086 unsafe { simd_select_bitmask(m:k, yes:_mm512_fmsub_pd(a, b, c), no:a) }
4087}
4088
4089/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4090///
4091/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmsub_pd&expand=2634)
4092#[inline]
4093#[target_feature(enable = "avx512f")]
4094#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4095#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
4096#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4097pub const fn _mm512_maskz_fmsub_pd(k: __mmask8, a: __m512d, b: __m512d, c: __m512d) -> __m512d {
4098 unsafe { simd_select_bitmask(m:k, yes:_mm512_fmsub_pd(a, b, c), no:_mm512_setzero_pd()) }
4099}
4100
4101/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4102///
4103/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmsub_pd&expand=2633)
4104#[inline]
4105#[target_feature(enable = "avx512f")]
4106#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4107#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
4108#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4109pub const fn _mm512_mask3_fmsub_pd(a: __m512d, b: __m512d, c: __m512d, k: __mmask8) -> __m512d {
4110 unsafe { simd_select_bitmask(m:k, yes:_mm512_fmsub_pd(a, b, c), no:c) }
4111}
4112
4113/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4114///
4115/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fmsub_pd&expand=2628)
4116#[inline]
4117#[target_feature(enable = "avx512f,avx512vl")]
4118#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4119#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
4120#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4121pub const fn _mm256_mask_fmsub_pd(a: __m256d, k: __mmask8, b: __m256d, c: __m256d) -> __m256d {
4122 unsafe { simd_select_bitmask(m:k, yes:_mm256_fmsub_pd(a, b, c), no:a) }
4123}
4124
4125/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4126///
4127/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fmsub_pd&expand=2630)
4128#[inline]
4129#[target_feature(enable = "avx512f,avx512vl")]
4130#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4131#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
4132#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4133pub const fn _mm256_maskz_fmsub_pd(k: __mmask8, a: __m256d, b: __m256d, c: __m256d) -> __m256d {
4134 unsafe { simd_select_bitmask(m:k, yes:_mm256_fmsub_pd(a, b, c), no:_mm256_setzero_pd()) }
4135}
4136
4137/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4138///
4139/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fmsub_pd&expand=2629)
4140#[inline]
4141#[target_feature(enable = "avx512f,avx512vl")]
4142#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4143#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
4144#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4145pub const fn _mm256_mask3_fmsub_pd(a: __m256d, b: __m256d, c: __m256d, k: __mmask8) -> __m256d {
4146 unsafe { simd_select_bitmask(m:k, yes:_mm256_fmsub_pd(a, b, c), no:c) }
4147}
4148
4149/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4150///
4151/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fmsub_pd&expand=2624)
4152#[inline]
4153#[target_feature(enable = "avx512f,avx512vl")]
4154#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4155#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
4156#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4157pub const fn _mm_mask_fmsub_pd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
4158 unsafe { simd_select_bitmask(m:k, yes:_mm_fmsub_pd(a, b, c), no:a) }
4159}
4160
4161/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4162///
4163/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fmsub_pd&expand=2626)
4164#[inline]
4165#[target_feature(enable = "avx512f,avx512vl")]
4166#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4167#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
4168#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4169pub const fn _mm_maskz_fmsub_pd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
4170 unsafe { simd_select_bitmask(m:k, yes:_mm_fmsub_pd(a, b, c), no:_mm_setzero_pd()) }
4171}
4172
4173/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4174///
4175/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fmsub_pd&expand=2625)
4176#[inline]
4177#[target_feature(enable = "avx512f,avx512vl")]
4178#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4179#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
4180#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4181pub const fn _mm_mask3_fmsub_pd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
4182 unsafe { simd_select_bitmask(m:k, yes:_mm_fmsub_pd(a, b, c), no:c) }
4183}
4184
4185/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst.
4186///
4187/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmaddsub_ps&expand=2611)
4188#[inline]
4189#[target_feature(enable = "avx512f")]
4190#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4191#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
4192#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4193pub const fn _mm512_fmaddsub_ps(a: __m512, b: __m512, c: __m512) -> __m512 {
4194 unsafe {
4195 let add: __m512 = simd_fma(x:a, y:b, z:c);
4196 let sub: __m512 = simd_fma(x:a, y:b, z:simd_neg(c));
4197 simd_shuffle!(
4198 add,
4199 sub,
4200 [16, 1, 18, 3, 20, 5, 22, 7, 24, 9, 26, 11, 28, 13, 30, 15]
4201 )
4202 }
4203}
4204
4205/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4206///
4207/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmaddsub_ps&expand=2612)
4208#[inline]
4209#[target_feature(enable = "avx512f")]
4210#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4211#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
4212#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4213pub const fn _mm512_mask_fmaddsub_ps(a: __m512, k: __mmask16, b: __m512, c: __m512) -> __m512 {
4214 unsafe { simd_select_bitmask(m:k, yes:_mm512_fmaddsub_ps(a, b, c), no:a) }
4215}
4216
4217/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4218///
4219/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmaddsub_ps&expand=2614)
4220#[inline]
4221#[target_feature(enable = "avx512f")]
4222#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4223#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
4224#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4225pub const fn _mm512_maskz_fmaddsub_ps(k: __mmask16, a: __m512, b: __m512, c: __m512) -> __m512 {
4226 unsafe { simd_select_bitmask(m:k, yes:_mm512_fmaddsub_ps(a, b, c), no:_mm512_setzero_ps()) }
4227}
4228
4229/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4230///
4231/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmaddsub_ps&expand=2613)
4232#[inline]
4233#[target_feature(enable = "avx512f")]
4234#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4235#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
4236#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4237pub const fn _mm512_mask3_fmaddsub_ps(a: __m512, b: __m512, c: __m512, k: __mmask16) -> __m512 {
4238 unsafe { simd_select_bitmask(m:k, yes:_mm512_fmaddsub_ps(a, b, c), no:c) }
4239}
4240
4241/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4242///
4243/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fmaddsub_ps&expand=2608)
4244#[inline]
4245#[target_feature(enable = "avx512f,avx512vl")]
4246#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4247#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
4248#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4249pub const fn _mm256_mask_fmaddsub_ps(a: __m256, k: __mmask8, b: __m256, c: __m256) -> __m256 {
4250 unsafe { simd_select_bitmask(m:k, yes:_mm256_fmaddsub_ps(a, b, c), no:a) }
4251}
4252
4253/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4254///
4255/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fmaddsub_ps&expand=2610)
4256#[inline]
4257#[target_feature(enable = "avx512f,avx512vl")]
4258#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4259#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
4260#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4261pub const fn _mm256_maskz_fmaddsub_ps(k: __mmask8, a: __m256, b: __m256, c: __m256) -> __m256 {
4262 unsafe { simd_select_bitmask(m:k, yes:_mm256_fmaddsub_ps(a, b, c), no:_mm256_setzero_ps()) }
4263}
4264
4265/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4266///
4267/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fmaddsub_ps&expand=2609)
4268#[inline]
4269#[target_feature(enable = "avx512f,avx512vl")]
4270#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4271#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
4272#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4273pub const fn _mm256_mask3_fmaddsub_ps(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256 {
4274 unsafe { simd_select_bitmask(m:k, yes:_mm256_fmaddsub_ps(a, b, c), no:c) }
4275}
4276
4277/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4278///
4279/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fmaddsub_ps&expand=2604)
4280#[inline]
4281#[target_feature(enable = "avx512f,avx512vl")]
4282#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4283#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
4284#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4285pub const fn _mm_mask_fmaddsub_ps(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
4286 unsafe { simd_select_bitmask(m:k, yes:_mm_fmaddsub_ps(a, b, c), no:a) }
4287}
4288
4289/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4290///
4291/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/IntrinsicsGuide/#text=_mm_maskz_fmaddsub_ps&expand=2606)
4292#[inline]
4293#[target_feature(enable = "avx512f,avx512vl")]
4294#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4295#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
4296#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4297pub const fn _mm_maskz_fmaddsub_ps(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
4298 unsafe { simd_select_bitmask(m:k, yes:_mm_fmaddsub_ps(a, b, c), no:_mm_setzero_ps()) }
4299}
4300
4301/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4302///
4303/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fmaddsub_ps&expand=2605)
4304#[inline]
4305#[target_feature(enable = "avx512f,avx512vl")]
4306#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4307#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
4308#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4309pub const fn _mm_mask3_fmaddsub_ps(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
4310 unsafe { simd_select_bitmask(m:k, yes:_mm_fmaddsub_ps(a, b, c), no:c) }
4311}
4312
4313/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst.
4314///
4315/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmaddsub_pd&expand=2599)
4316#[inline]
4317#[target_feature(enable = "avx512f")]
4318#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4319#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
4320#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4321pub const fn _mm512_fmaddsub_pd(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
4322 unsafe {
4323 let add: __m512d = simd_fma(x:a, y:b, z:c);
4324 let sub: __m512d = simd_fma(x:a, y:b, z:simd_neg(c));
4325 simd_shuffle!(add, sub, [8, 1, 10, 3, 12, 5, 14, 7])
4326 }
4327}
4328
4329/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4330///
4331/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmaddsub_pd&expand=2600)
4332#[inline]
4333#[target_feature(enable = "avx512f")]
4334#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4335#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
4336#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4337pub const fn _mm512_mask_fmaddsub_pd(a: __m512d, k: __mmask8, b: __m512d, c: __m512d) -> __m512d {
4338 unsafe { simd_select_bitmask(m:k, yes:_mm512_fmaddsub_pd(a, b, c), no:a) }
4339}
4340
4341/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4342///
4343/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmaddsub_pd&expand=2602)
4344#[inline]
4345#[target_feature(enable = "avx512f")]
4346#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4347#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
4348#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4349pub const fn _mm512_maskz_fmaddsub_pd(k: __mmask8, a: __m512d, b: __m512d, c: __m512d) -> __m512d {
4350 unsafe { simd_select_bitmask(m:k, yes:_mm512_fmaddsub_pd(a, b, c), no:_mm512_setzero_pd()) }
4351}
4352
4353/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4354///
4355/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmaddsub_pd&expand=2613)
4356#[inline]
4357#[target_feature(enable = "avx512f")]
4358#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4359#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
4360#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4361pub const fn _mm512_mask3_fmaddsub_pd(a: __m512d, b: __m512d, c: __m512d, k: __mmask8) -> __m512d {
4362 unsafe { simd_select_bitmask(m:k, yes:_mm512_fmaddsub_pd(a, b, c), no:c) }
4363}
4364
4365/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4366///
4367/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fmaddsub_pd&expand=2596)
4368#[inline]
4369#[target_feature(enable = "avx512f,avx512vl")]
4370#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4371#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
4372#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4373pub const fn _mm256_mask_fmaddsub_pd(a: __m256d, k: __mmask8, b: __m256d, c: __m256d) -> __m256d {
4374 unsafe { simd_select_bitmask(m:k, yes:_mm256_fmaddsub_pd(a, b, c), no:a) }
4375}
4376
4377/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4378///
4379/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fmaddsub_pd&expand=2598)
4380#[inline]
4381#[target_feature(enable = "avx512f,avx512vl")]
4382#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4383#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
4384#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4385pub const fn _mm256_maskz_fmaddsub_pd(k: __mmask8, a: __m256d, b: __m256d, c: __m256d) -> __m256d {
4386 unsafe { simd_select_bitmask(m:k, yes:_mm256_fmaddsub_pd(a, b, c), no:_mm256_setzero_pd()) }
4387}
4388
4389/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4390///
4391/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fmaddsub_pd&expand=2597)
4392#[inline]
4393#[target_feature(enable = "avx512f,avx512vl")]
4394#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4395#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
4396#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4397pub const fn _mm256_mask3_fmaddsub_pd(a: __m256d, b: __m256d, c: __m256d, k: __mmask8) -> __m256d {
4398 unsafe { simd_select_bitmask(m:k, yes:_mm256_fmaddsub_pd(a, b, c), no:c) }
4399}
4400
4401/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4402///
4403/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fmaddsub_pd&expand=2592)
4404#[inline]
4405#[target_feature(enable = "avx512f,avx512vl")]
4406#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4407#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
4408#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4409pub const fn _mm_mask_fmaddsub_pd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
4410 unsafe { simd_select_bitmask(m:k, yes:_mm_fmaddsub_pd(a, b, c), no:a) }
4411}
4412
4413/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4414///
4415/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fmaddsub_pd&expand=2594)
4416#[inline]
4417#[target_feature(enable = "avx512f,avx512vl")]
4418#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4419#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
4420#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4421pub const fn _mm_maskz_fmaddsub_pd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
4422 unsafe { simd_select_bitmask(m:k, yes:_mm_fmaddsub_pd(a, b, c), no:_mm_setzero_pd()) }
4423}
4424
4425/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4426///
4427/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fmaddsub_pd&expand=2593)
4428#[inline]
4429#[target_feature(enable = "avx512f,avx512vl")]
4430#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4431#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
4432#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4433pub const fn _mm_mask3_fmaddsub_pd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
4434 unsafe { simd_select_bitmask(m:k, yes:_mm_fmaddsub_pd(a, b, c), no:c) }
4435}
4436
4437/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst.
4438///
4439/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmsubadd_ps&expand=2691)
4440#[inline]
4441#[target_feature(enable = "avx512f")]
4442#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4443#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
4444#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4445pub const fn _mm512_fmsubadd_ps(a: __m512, b: __m512, c: __m512) -> __m512 {
4446 unsafe {
4447 let add: __m512 = simd_fma(x:a, y:b, z:c);
4448 let sub: __m512 = simd_fma(x:a, y:b, z:simd_neg(c));
4449 simd_shuffle!(
4450 add,
4451 sub,
4452 [0, 17, 2, 19, 4, 21, 6, 23, 8, 25, 10, 27, 12, 29, 14, 31]
4453 )
4454 }
4455}
4456
4457/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4458///
4459/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmsubadd_ps&expand=2692)
4460#[inline]
4461#[target_feature(enable = "avx512f")]
4462#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4463#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
4464#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4465pub const fn _mm512_mask_fmsubadd_ps(a: __m512, k: __mmask16, b: __m512, c: __m512) -> __m512 {
4466 unsafe { simd_select_bitmask(m:k, yes:_mm512_fmsubadd_ps(a, b, c), no:a) }
4467}
4468
4469/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4470///
4471/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmsubadd_ps&expand=2694)
4472#[inline]
4473#[target_feature(enable = "avx512f")]
4474#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4475#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
4476#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4477pub const fn _mm512_maskz_fmsubadd_ps(k: __mmask16, a: __m512, b: __m512, c: __m512) -> __m512 {
4478 unsafe { simd_select_bitmask(m:k, yes:_mm512_fmsubadd_ps(a, b, c), no:_mm512_setzero_ps()) }
4479}
4480
4481/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4482///
4483/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmsubadd_ps&expand=2693)
4484#[inline]
4485#[target_feature(enable = "avx512f")]
4486#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4487#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
4488#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4489pub const fn _mm512_mask3_fmsubadd_ps(a: __m512, b: __m512, c: __m512, k: __mmask16) -> __m512 {
4490 unsafe { simd_select_bitmask(m:k, yes:_mm512_fmsubadd_ps(a, b, c), no:c) }
4491}
4492
4493/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4494///
4495/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fmsubadd_ps&expand=2688)
4496#[inline]
4497#[target_feature(enable = "avx512f,avx512vl")]
4498#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4499#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
4500#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4501pub const fn _mm256_mask_fmsubadd_ps(a: __m256, k: __mmask8, b: __m256, c: __m256) -> __m256 {
4502 unsafe { simd_select_bitmask(m:k, yes:_mm256_fmsubadd_ps(a, b, c), no:a) }
4503}
4504
4505/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4506///
4507/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fmsubadd_ps&expand=2690)
4508#[inline]
4509#[target_feature(enable = "avx512f,avx512vl")]
4510#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4511#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
4512#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4513pub const fn _mm256_maskz_fmsubadd_ps(k: __mmask8, a: __m256, b: __m256, c: __m256) -> __m256 {
4514 unsafe { simd_select_bitmask(m:k, yes:_mm256_fmsubadd_ps(a, b, c), no:_mm256_setzero_ps()) }
4515}
4516
4517/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4518///
4519/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fmsubadd_ps&expand=2689)
4520#[inline]
4521#[target_feature(enable = "avx512f,avx512vl")]
4522#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4523#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
4524#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4525pub const fn _mm256_mask3_fmsubadd_ps(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256 {
4526 unsafe { simd_select_bitmask(m:k, yes:_mm256_fmsubadd_ps(a, b, c), no:c) }
4527}
4528
4529/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4530///
4531/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fmsubadd_ps&expand=2684)
4532#[inline]
4533#[target_feature(enable = "avx512f,avx512vl")]
4534#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4535#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
4536#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4537pub const fn _mm_mask_fmsubadd_ps(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
4538 unsafe { simd_select_bitmask(m:k, yes:_mm_fmsubadd_ps(a, b, c), no:a) }
4539}
4540
4541/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4542///
4543/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fmsubadd_ps&expand=2686)
4544#[inline]
4545#[target_feature(enable = "avx512f,avx512vl")]
4546#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4547#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
4548#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4549pub const fn _mm_maskz_fmsubadd_ps(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
4550 unsafe { simd_select_bitmask(m:k, yes:_mm_fmsubadd_ps(a, b, c), no:_mm_setzero_ps()) }
4551}
4552
4553/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4554///
4555/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fmsubadd_ps&expand=2685)
4556#[inline]
4557#[target_feature(enable = "avx512f,avx512vl")]
4558#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4559#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
4560#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4561pub const fn _mm_mask3_fmsubadd_ps(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
4562 unsafe { simd_select_bitmask(m:k, yes:_mm_fmsubadd_ps(a, b, c), no:c) }
4563}
4564
4565/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst.
4566///
4567/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmsubadd_pd&expand=2679)
4568#[inline]
4569#[target_feature(enable = "avx512f")]
4570#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4571#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
4572#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4573pub const fn _mm512_fmsubadd_pd(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
4574 unsafe {
4575 let add: __m512d = simd_fma(x:a, y:b, z:c);
4576 let sub: __m512d = simd_fma(x:a, y:b, z:simd_neg(c));
4577 simd_shuffle!(add, sub, [0, 9, 2, 11, 4, 13, 6, 15])
4578 }
4579}
4580
4581/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4582///
4583/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmsubadd_pd&expand=2680)
4584#[inline]
4585#[target_feature(enable = "avx512f")]
4586#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4587#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
4588#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4589pub const fn _mm512_mask_fmsubadd_pd(a: __m512d, k: __mmask8, b: __m512d, c: __m512d) -> __m512d {
4590 unsafe { simd_select_bitmask(m:k, yes:_mm512_fmsubadd_pd(a, b, c), no:a) }
4591}
4592
4593/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4594///
4595/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmsubadd_pd&expand=2682)
4596#[inline]
4597#[target_feature(enable = "avx512f")]
4598#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4599#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
4600#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4601pub const fn _mm512_maskz_fmsubadd_pd(k: __mmask8, a: __m512d, b: __m512d, c: __m512d) -> __m512d {
4602 unsafe { simd_select_bitmask(m:k, yes:_mm512_fmsubadd_pd(a, b, c), no:_mm512_setzero_pd()) }
4603}
4604
4605/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4606///
4607/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmsubadd_pd&expand=2681)
4608#[inline]
4609#[target_feature(enable = "avx512f")]
4610#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4611#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
4612#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4613pub const fn _mm512_mask3_fmsubadd_pd(a: __m512d, b: __m512d, c: __m512d, k: __mmask8) -> __m512d {
4614 unsafe { simd_select_bitmask(m:k, yes:_mm512_fmsubadd_pd(a, b, c), no:c) }
4615}
4616
4617/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4618///
4619/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fmsubadd_pd&expand=2676)
4620#[inline]
4621#[target_feature(enable = "avx512f,avx512vl")]
4622#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4623#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
4624#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4625pub const fn _mm256_mask_fmsubadd_pd(a: __m256d, k: __mmask8, b: __m256d, c: __m256d) -> __m256d {
4626 unsafe { simd_select_bitmask(m:k, yes:_mm256_fmsubadd_pd(a, b, c), no:a) }
4627}
4628
4629/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4630///
4631/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fmsubadd_pd&expand=2678)
4632#[inline]
4633#[target_feature(enable = "avx512f,avx512vl")]
4634#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4635#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
4636#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4637pub const fn _mm256_maskz_fmsubadd_pd(k: __mmask8, a: __m256d, b: __m256d, c: __m256d) -> __m256d {
4638 unsafe { simd_select_bitmask(m:k, yes:_mm256_fmsubadd_pd(a, b, c), no:_mm256_setzero_pd()) }
4639}
4640
4641/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4642///
4643/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fmsubadd_pd&expand=2677)
4644#[inline]
4645#[target_feature(enable = "avx512f,avx512vl")]
4646#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4647#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
4648#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4649pub const fn _mm256_mask3_fmsubadd_pd(a: __m256d, b: __m256d, c: __m256d, k: __mmask8) -> __m256d {
4650 unsafe { simd_select_bitmask(m:k, yes:_mm256_fmsubadd_pd(a, b, c), no:c) }
4651}
4652
4653/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4654///
4655/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fmsubadd_pd&expand=2672)
4656#[inline]
4657#[target_feature(enable = "avx512f,avx512vl")]
4658#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4659#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
4660#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4661pub const fn _mm_mask_fmsubadd_pd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
4662 unsafe { simd_select_bitmask(m:k, yes:_mm_fmsubadd_pd(a, b, c), no:a) }
4663}
4664
4665/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4666///
4667/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fmsubadd_pd&expand=2674)
4668#[inline]
4669#[target_feature(enable = "avx512f,avx512vl")]
4670#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4671#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
4672#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4673pub const fn _mm_maskz_fmsubadd_pd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
4674 unsafe { simd_select_bitmask(m:k, yes:_mm_fmsubadd_pd(a, b, c), no:_mm_setzero_pd()) }
4675}
4676
4677/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4678///
4679/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fmsubadd_pd&expand=2673)
4680#[inline]
4681#[target_feature(enable = "avx512f,avx512vl")]
4682#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4683#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
4684#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4685pub const fn _mm_mask3_fmsubadd_pd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
4686 unsafe { simd_select_bitmask(m:k, yes:_mm_fmsubadd_pd(a, b, c), no:c) }
4687}
4688
4689/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst.
4690///
4691/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fnmadd_ps&expand=2723)
4692#[inline]
4693#[target_feature(enable = "avx512f")]
4694#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4695#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
4696#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4697pub const fn _mm512_fnmadd_ps(a: __m512, b: __m512, c: __m512) -> __m512 {
4698 unsafe { simd_fma(x:simd_neg(a), y:b, z:c) }
4699}
4700
4701/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4702///
4703/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fnmadd_ps&expand=2724)
4704#[inline]
4705#[target_feature(enable = "avx512f")]
4706#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4707#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
4708#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4709pub const fn _mm512_mask_fnmadd_ps(a: __m512, k: __mmask16, b: __m512, c: __m512) -> __m512 {
4710 unsafe { simd_select_bitmask(m:k, yes:_mm512_fnmadd_ps(a, b, c), no:a) }
4711}
4712
4713/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4714///
4715/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fnmadd_ps&expand=2726)
4716#[inline]
4717#[target_feature(enable = "avx512f")]
4718#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4719#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
4720#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4721pub const fn _mm512_maskz_fnmadd_ps(k: __mmask16, a: __m512, b: __m512, c: __m512) -> __m512 {
4722 unsafe { simd_select_bitmask(m:k, yes:_mm512_fnmadd_ps(a, b, c), no:_mm512_setzero_ps()) }
4723}
4724
4725/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4726///
4727/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fnmadd_ps&expand=2725)
4728#[inline]
4729#[target_feature(enable = "avx512f")]
4730#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4731#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
4732#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4733pub const fn _mm512_mask3_fnmadd_ps(a: __m512, b: __m512, c: __m512, k: __mmask16) -> __m512 {
4734 unsafe { simd_select_bitmask(m:k, yes:_mm512_fnmadd_ps(a, b, c), no:c) }
4735}
4736
4737/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4738///
4739/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fnmadd_ps&expand=2720)
4740#[inline]
4741#[target_feature(enable = "avx512f,avx512vl")]
4742#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4743#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
4744#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4745pub const fn _mm256_mask_fnmadd_ps(a: __m256, k: __mmask8, b: __m256, c: __m256) -> __m256 {
4746 unsafe { simd_select_bitmask(m:k, yes:_mm256_fnmadd_ps(a, b, c), no:a) }
4747}
4748
4749/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4750///
4751/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fnmadd_ps&expand=2722)
4752#[inline]
4753#[target_feature(enable = "avx512f,avx512vl")]
4754#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4755#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
4756#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4757pub const fn _mm256_maskz_fnmadd_ps(k: __mmask8, a: __m256, b: __m256, c: __m256) -> __m256 {
4758 unsafe { simd_select_bitmask(m:k, yes:_mm256_fnmadd_ps(a, b, c), no:_mm256_setzero_ps()) }
4759}
4760
4761/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4762///
4763/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fnmadd_ps&expand=2721)
4764#[inline]
4765#[target_feature(enable = "avx512f,avx512vl")]
4766#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4767#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
4768#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4769pub const fn _mm256_mask3_fnmadd_ps(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256 {
4770 unsafe { simd_select_bitmask(m:k, yes:_mm256_fnmadd_ps(a, b, c), no:c) }
4771}
4772
4773/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4774///
4775/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fnmadd_ps&expand=2716)
4776#[inline]
4777#[target_feature(enable = "avx512f,avx512vl")]
4778#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4779#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
4780#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4781pub const fn _mm_mask_fnmadd_ps(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
4782 unsafe { simd_select_bitmask(m:k, yes:_mm_fnmadd_ps(a, b, c), no:a) }
4783}
4784
4785/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4786///
4787/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fnmadd_ps&expand=2718)
4788#[inline]
4789#[target_feature(enable = "avx512f,avx512vl")]
4790#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4791#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
4792#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4793pub const fn _mm_maskz_fnmadd_ps(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
4794 unsafe { simd_select_bitmask(m:k, yes:_mm_fnmadd_ps(a, b, c), no:_mm_setzero_ps()) }
4795}
4796
4797/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4798///
4799/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fnmadd_ps&expand=2717)
4800#[inline]
4801#[target_feature(enable = "avx512f,avx512vl")]
4802#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4803#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
4804#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4805pub const fn _mm_mask3_fnmadd_ps(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
4806 unsafe { simd_select_bitmask(m:k, yes:_mm_fnmadd_ps(a, b, c), no:c) }
4807}
4808
4809/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst.
4810///
4811/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fnmadd_pd&expand=2711)
4812#[inline]
4813#[target_feature(enable = "avx512f")]
4814#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4815#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
4816#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4817pub const fn _mm512_fnmadd_pd(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
4818 unsafe { simd_fma(x:simd_neg(a), y:b, z:c) }
4819}
4820
4821/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4822///
4823/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fnmadd_pd&expand=2712)
4824#[inline]
4825#[target_feature(enable = "avx512f")]
4826#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4827#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
4828#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4829pub const fn _mm512_mask_fnmadd_pd(a: __m512d, k: __mmask8, b: __m512d, c: __m512d) -> __m512d {
4830 unsafe { simd_select_bitmask(m:k, yes:_mm512_fnmadd_pd(a, b, c), no:a) }
4831}
4832
4833/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4834///
4835/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fnmadd_pd&expand=2714)
4836#[inline]
4837#[target_feature(enable = "avx512f")]
4838#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4839#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
4840#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4841pub const fn _mm512_maskz_fnmadd_pd(k: __mmask8, a: __m512d, b: __m512d, c: __m512d) -> __m512d {
4842 unsafe { simd_select_bitmask(m:k, yes:_mm512_fnmadd_pd(a, b, c), no:_mm512_setzero_pd()) }
4843}
4844
4845/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4846///
4847/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fnmadd_pd&expand=2713)
4848#[inline]
4849#[target_feature(enable = "avx512f")]
4850#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4851#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
4852#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4853pub const fn _mm512_mask3_fnmadd_pd(a: __m512d, b: __m512d, c: __m512d, k: __mmask8) -> __m512d {
4854 unsafe { simd_select_bitmask(m:k, yes:_mm512_fnmadd_pd(a, b, c), no:c) }
4855}
4856
4857/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4858///
4859/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fnmadd_pd&expand=2708)
4860#[inline]
4861#[target_feature(enable = "avx512f,avx512vl")]
4862#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4863#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
4864#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4865pub const fn _mm256_mask_fnmadd_pd(a: __m256d, k: __mmask8, b: __m256d, c: __m256d) -> __m256d {
4866 unsafe { simd_select_bitmask(m:k, yes:_mm256_fnmadd_pd(a, b, c), no:a) }
4867}
4868
4869/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4870///
4871/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fnmadd_pd&expand=2710)
4872#[inline]
4873#[target_feature(enable = "avx512f,avx512vl")]
4874#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4875#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
4876#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4877pub const fn _mm256_maskz_fnmadd_pd(k: __mmask8, a: __m256d, b: __m256d, c: __m256d) -> __m256d {
4878 unsafe { simd_select_bitmask(m:k, yes:_mm256_fnmadd_pd(a, b, c), no:_mm256_setzero_pd()) }
4879}
4880
4881/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4882///
4883/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fnmadd_pd&expand=2709)
4884#[inline]
4885#[target_feature(enable = "avx512f,avx512vl")]
4886#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4887#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
4888#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4889pub const fn _mm256_mask3_fnmadd_pd(a: __m256d, b: __m256d, c: __m256d, k: __mmask8) -> __m256d {
4890 unsafe { simd_select_bitmask(m:k, yes:_mm256_fnmadd_pd(a, b, c), no:c) }
4891}
4892
4893/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4894///
4895/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fnmadd_pd&expand=2704)
4896#[inline]
4897#[target_feature(enable = "avx512f,avx512vl")]
4898#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4899#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
4900#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4901pub const fn _mm_mask_fnmadd_pd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
4902 unsafe { simd_select_bitmask(m:k, yes:_mm_fnmadd_pd(a, b, c), no:a) }
4903}
4904
4905/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4906///
4907/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fnmadd_pd&expand=2706)
4908#[inline]
4909#[target_feature(enable = "avx512f,avx512vl")]
4910#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4911#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
4912#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4913pub const fn _mm_maskz_fnmadd_pd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
4914 unsafe { simd_select_bitmask(m:k, yes:_mm_fnmadd_pd(a, b, c), no:_mm_setzero_pd()) }
4915}
4916
4917/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4918///
4919/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fnmadd_pd&expand=2705)
4920#[inline]
4921#[target_feature(enable = "avx512f,avx512vl")]
4922#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4923#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
4924#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4925pub const fn _mm_mask3_fnmadd_pd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
4926 unsafe { simd_select_bitmask(m:k, yes:_mm_fnmadd_pd(a, b, c), no:c) }
4927}
4928
4929/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst.
4930///
4931/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fnmsub_ps&expand=2771)
4932#[inline]
4933#[target_feature(enable = "avx512f")]
4934#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4935#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
4936#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4937pub const fn _mm512_fnmsub_ps(a: __m512, b: __m512, c: __m512) -> __m512 {
4938 unsafe { simd_fma(x:simd_neg(a), y:b, z:simd_neg(c)) }
4939}
4940
4941/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4942///
4943/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fnmsub_ps&expand=2772)
4944#[inline]
4945#[target_feature(enable = "avx512f")]
4946#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4947#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
4948#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4949pub const fn _mm512_mask_fnmsub_ps(a: __m512, k: __mmask16, b: __m512, c: __m512) -> __m512 {
4950 unsafe { simd_select_bitmask(m:k, yes:_mm512_fnmsub_ps(a, b, c), no:a) }
4951}
4952
4953/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4954///
4955/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fnmsub_ps&expand=2774)
4956#[inline]
4957#[target_feature(enable = "avx512f")]
4958#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4959#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
4960#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4961pub const fn _mm512_maskz_fnmsub_ps(k: __mmask16, a: __m512, b: __m512, c: __m512) -> __m512 {
4962 unsafe { simd_select_bitmask(m:k, yes:_mm512_fnmsub_ps(a, b, c), no:_mm512_setzero_ps()) }
4963}
4964
4965/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4966///
4967/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fnmsub_ps&expand=2773)
4968#[inline]
4969#[target_feature(enable = "avx512f")]
4970#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4971#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
4972#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4973pub const fn _mm512_mask3_fnmsub_ps(a: __m512, b: __m512, c: __m512, k: __mmask16) -> __m512 {
4974 unsafe { simd_select_bitmask(m:k, yes:_mm512_fnmsub_ps(a, b, c), no:c) }
4975}
4976
4977/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4978///
4979/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fnmsub_ps&expand=2768)
4980#[inline]
4981#[target_feature(enable = "avx512f,avx512vl")]
4982#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4983#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
4984#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4985pub const fn _mm256_mask_fnmsub_ps(a: __m256, k: __mmask8, b: __m256, c: __m256) -> __m256 {
4986 unsafe { simd_select_bitmask(m:k, yes:_mm256_fnmsub_ps(a, b, c), no:a) }
4987}
4988
4989/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4990///
4991/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fnmsub_ps&expand=2770)
4992#[inline]
4993#[target_feature(enable = "avx512f,avx512vl")]
4994#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4995#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
4996#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4997pub const fn _mm256_maskz_fnmsub_ps(k: __mmask8, a: __m256, b: __m256, c: __m256) -> __m256 {
4998 unsafe { simd_select_bitmask(m:k, yes:_mm256_fnmsub_ps(a, b, c), no:_mm256_setzero_ps()) }
4999}
5000
5001/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
5002///
5003/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fnmsub_ps&expand=2769)
5004#[inline]
5005#[target_feature(enable = "avx512f,avx512vl")]
5006#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5007#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
5008#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5009pub const fn _mm256_mask3_fnmsub_ps(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256 {
5010 unsafe { simd_select_bitmask(m:k, yes:_mm256_fnmsub_ps(a, b, c), no:c) }
5011}
5012
5013/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
5014///
5015/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fnmsub_ps&expand=2764)
5016#[inline]
5017#[target_feature(enable = "avx512f,avx512vl")]
5018#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5019#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
5020#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5021pub const fn _mm_mask_fnmsub_ps(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
5022 unsafe { simd_select_bitmask(m:k, yes:_mm_fnmsub_ps(a, b, c), no:a) }
5023}
5024
5025/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
5026///
5027/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fnmsub_ps&expand=2766)
5028#[inline]
5029#[target_feature(enable = "avx512f,avx512vl")]
5030#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5031#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
5032#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5033pub const fn _mm_maskz_fnmsub_ps(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
5034 unsafe { simd_select_bitmask(m:k, yes:_mm_fnmsub_ps(a, b, c), no:_mm_setzero_ps()) }
5035}
5036
5037/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
5038///
5039/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fnmsub_ps&expand=2765)
5040#[inline]
5041#[target_feature(enable = "avx512f,avx512vl")]
5042#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5043#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
5044#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5045pub const fn _mm_mask3_fnmsub_ps(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
5046 unsafe { simd_select_bitmask(m:k, yes:_mm_fnmsub_ps(a, b, c), no:c) }
5047}
5048
5049/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst.
5050///
5051/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fnmsub_pd&expand=2759)
5052#[inline]
5053#[target_feature(enable = "avx512f")]
5054#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5055#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
5056#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5057pub const fn _mm512_fnmsub_pd(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
5058 unsafe { simd_fma(x:simd_neg(a), y:b, z:simd_neg(c)) }
5059}
5060
5061/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
5062///
5063/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fnmsub_pd&expand=2760)
5064#[inline]
5065#[target_feature(enable = "avx512f")]
5066#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5067#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
5068#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5069pub const fn _mm512_mask_fnmsub_pd(a: __m512d, k: __mmask8, b: __m512d, c: __m512d) -> __m512d {
5070 unsafe { simd_select_bitmask(m:k, yes:_mm512_fnmsub_pd(a, b, c), no:a) }
5071}
5072
5073/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
5074///
5075/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fnmsub_pd&expand=2762)
5076#[inline]
5077#[target_feature(enable = "avx512f")]
5078#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5079#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
5080#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5081pub const fn _mm512_maskz_fnmsub_pd(k: __mmask8, a: __m512d, b: __m512d, c: __m512d) -> __m512d {
5082 unsafe { simd_select_bitmask(m:k, yes:_mm512_fnmsub_pd(a, b, c), no:_mm512_setzero_pd()) }
5083}
5084
5085/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
5086///
5087/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fnmsub_pd&expand=2761)
5088#[inline]
5089#[target_feature(enable = "avx512f")]
5090#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5091#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
5092#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5093pub const fn _mm512_mask3_fnmsub_pd(a: __m512d, b: __m512d, c: __m512d, k: __mmask8) -> __m512d {
5094 unsafe { simd_select_bitmask(m:k, yes:_mm512_fnmsub_pd(a, b, c), no:c) }
5095}
5096
5097/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
5098///
5099/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fnmsub_pd&expand=2756)
5100#[inline]
5101#[target_feature(enable = "avx512f,avx512vl")]
5102#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5103#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
5104#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5105pub const fn _mm256_mask_fnmsub_pd(a: __m256d, k: __mmask8, b: __m256d, c: __m256d) -> __m256d {
5106 unsafe { simd_select_bitmask(m:k, yes:_mm256_fnmsub_pd(a, b, c), no:a) }
5107}
5108
5109/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
5110///
5111/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fnmsub_pd&expand=2758)
5112#[inline]
5113#[target_feature(enable = "avx512f,avx512vl")]
5114#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5115#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
5116#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5117pub const fn _mm256_maskz_fnmsub_pd(k: __mmask8, a: __m256d, b: __m256d, c: __m256d) -> __m256d {
5118 unsafe { simd_select_bitmask(m:k, yes:_mm256_fnmsub_pd(a, b, c), no:_mm256_setzero_pd()) }
5119}
5120
5121/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
5122///
5123/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fnmsub_pd&expand=2757)
5124#[inline]
5125#[target_feature(enable = "avx512f,avx512vl")]
5126#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5127#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
5128#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5129pub const fn _mm256_mask3_fnmsub_pd(a: __m256d, b: __m256d, c: __m256d, k: __mmask8) -> __m256d {
5130 unsafe { simd_select_bitmask(m:k, yes:_mm256_fnmsub_pd(a, b, c), no:c) }
5131}
5132
5133/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
5134///
5135/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fnmsub_pd&expand=2752)
5136#[inline]
5137#[target_feature(enable = "avx512f,avx512vl")]
5138#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5139#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
5140#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5141pub const fn _mm_mask_fnmsub_pd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
5142 unsafe { simd_select_bitmask(m:k, yes:_mm_fnmsub_pd(a, b, c), no:a) }
5143}
5144
5145/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
5146///
5147/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fnmsub_pd&expand=2754)
5148#[inline]
5149#[target_feature(enable = "avx512f,avx512vl")]
5150#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5151#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
5152#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5153pub const fn _mm_maskz_fnmsub_pd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
5154 unsafe { simd_select_bitmask(m:k, yes:_mm_fnmsub_pd(a, b, c), no:_mm_setzero_pd()) }
5155}
5156
5157/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
5158///
5159/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fnmsub_pd&expand=2753)
5160#[inline]
5161#[target_feature(enable = "avx512f,avx512vl")]
5162#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5163#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
5164#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5165pub const fn _mm_mask3_fnmsub_pd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
5166 unsafe { simd_select_bitmask(m:k, yes:_mm_fnmsub_pd(a, b, c), no:c) }
5167}
5168
5169/// Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
5170///
5171/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_rcp14_ps&expand=4502)
5172#[inline]
5173#[target_feature(enable = "avx512f")]
5174#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5175#[cfg_attr(test, assert_instr(vrcp14ps))]
5176pub fn _mm512_rcp14_ps(a: __m512) -> __m512 {
5177 unsafe { transmute(src:vrcp14ps(a.as_f32x16(), src:f32x16::ZERO, m:0b11111111_11111111)) }
5178}
5179
5180/// Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5181///
5182/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_rcp14_ps&expand=4500)
5183#[inline]
5184#[target_feature(enable = "avx512f")]
5185#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5186#[cfg_attr(test, assert_instr(vrcp14ps))]
5187pub fn _mm512_mask_rcp14_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 {
5188 unsafe { transmute(src:vrcp14ps(a.as_f32x16(), src.as_f32x16(), m:k)) }
5189}
5190
5191/// Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5192///
5193/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_rcp14_ps&expand=4501)
5194#[inline]
5195#[target_feature(enable = "avx512f")]
5196#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5197#[cfg_attr(test, assert_instr(vrcp14ps))]
5198pub fn _mm512_maskz_rcp14_ps(k: __mmask16, a: __m512) -> __m512 {
5199 unsafe { transmute(src:vrcp14ps(a.as_f32x16(), src:f32x16::ZERO, m:k)) }
5200}
5201
5202/// Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
5203///
5204/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_rcp14_ps&expand=4499)
5205#[inline]
5206#[target_feature(enable = "avx512f,avx512vl")]
5207#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5208#[cfg_attr(test, assert_instr(vrcp14ps))]
5209pub fn _mm256_rcp14_ps(a: __m256) -> __m256 {
5210 unsafe { transmute(src:vrcp14ps256(a.as_f32x8(), src:f32x8::ZERO, m:0b11111111)) }
5211}
5212
5213/// Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5214///
5215/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_rcp14_ps&expand=4497)
5216#[inline]
5217#[target_feature(enable = "avx512f,avx512vl")]
5218#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5219#[cfg_attr(test, assert_instr(vrcp14ps))]
5220pub fn _mm256_mask_rcp14_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 {
5221 unsafe { transmute(src:vrcp14ps256(a.as_f32x8(), src.as_f32x8(), m:k)) }
5222}
5223
5224/// Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5225///
5226/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_rcp14_ps&expand=4498)
5227#[inline]
5228#[target_feature(enable = "avx512f,avx512vl")]
5229#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5230#[cfg_attr(test, assert_instr(vrcp14ps))]
5231pub fn _mm256_maskz_rcp14_ps(k: __mmask8, a: __m256) -> __m256 {
5232 unsafe { transmute(src:vrcp14ps256(a.as_f32x8(), src:f32x8::ZERO, m:k)) }
5233}
5234
5235/// Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
5236///
5237/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rcp14_ps&expand=4496)
5238#[inline]
5239#[target_feature(enable = "avx512f,avx512vl")]
5240#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5241#[cfg_attr(test, assert_instr(vrcp14ps))]
5242pub fn _mm_rcp14_ps(a: __m128) -> __m128 {
5243 unsafe { transmute(src:vrcp14ps128(a.as_f32x4(), src:f32x4::ZERO, m:0b00001111)) }
5244}
5245
5246/// Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5247///
5248/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_rcp14_ps&expand=4494)
5249#[inline]
5250#[target_feature(enable = "avx512f,avx512vl")]
5251#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5252#[cfg_attr(test, assert_instr(vrcp14ps))]
5253pub fn _mm_mask_rcp14_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 {
5254 unsafe { transmute(src:vrcp14ps128(a.as_f32x4(), src.as_f32x4(), m:k)) }
5255}
5256
5257/// Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5258///
5259/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_rcp14_ps&expand=4495)
5260#[inline]
5261#[target_feature(enable = "avx512f,avx512vl")]
5262#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5263#[cfg_attr(test, assert_instr(vrcp14ps))]
5264pub fn _mm_maskz_rcp14_ps(k: __mmask8, a: __m128) -> __m128 {
5265 unsafe { transmute(src:vrcp14ps128(a.as_f32x4(), src:f32x4::ZERO, m:k)) }
5266}
5267
5268/// Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
5269///
5270/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_rcp14_pd&expand=4493)
5271#[inline]
5272#[target_feature(enable = "avx512f")]
5273#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5274#[cfg_attr(test, assert_instr(vrcp14pd))]
5275pub fn _mm512_rcp14_pd(a: __m512d) -> __m512d {
5276 unsafe { transmute(src:vrcp14pd(a.as_f64x8(), src:f64x8::ZERO, m:0b11111111)) }
5277}
5278
5279/// Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5280///
5281/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_rcp14_pd&expand=4491)
5282#[inline]
5283#[target_feature(enable = "avx512f")]
5284#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5285#[cfg_attr(test, assert_instr(vrcp14pd))]
5286pub fn _mm512_mask_rcp14_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d {
5287 unsafe { transmute(src:vrcp14pd(a.as_f64x8(), src.as_f64x8(), m:k)) }
5288}
5289
5290/// Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5291///
5292/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_rcp14_pd&expand=4492)
5293#[inline]
5294#[target_feature(enable = "avx512f")]
5295#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5296#[cfg_attr(test, assert_instr(vrcp14pd))]
5297pub fn _mm512_maskz_rcp14_pd(k: __mmask8, a: __m512d) -> __m512d {
5298 unsafe { transmute(src:vrcp14pd(a.as_f64x8(), src:f64x8::ZERO, m:k)) }
5299}
5300
5301/// Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
5302///
5303/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_rcp14_pd&expand=4490)
5304#[inline]
5305#[target_feature(enable = "avx512f,avx512vl")]
5306#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5307#[cfg_attr(test, assert_instr(vrcp14pd))]
5308pub fn _mm256_rcp14_pd(a: __m256d) -> __m256d {
5309 unsafe { transmute(src:vrcp14pd256(a.as_f64x4(), src:f64x4::ZERO, m:0b00001111)) }
5310}
5311
5312/// Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5313///
5314/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_rcp14_pd&expand=4488)
5315#[inline]
5316#[target_feature(enable = "avx512f,avx512vl")]
5317#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5318#[cfg_attr(test, assert_instr(vrcp14pd))]
5319pub fn _mm256_mask_rcp14_pd(src: __m256d, k: __mmask8, a: __m256d) -> __m256d {
5320 unsafe { transmute(src:vrcp14pd256(a.as_f64x4(), src.as_f64x4(), m:k)) }
5321}
5322
5323/// Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5324///
5325/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_rcp14_pd&expand=4489)
5326#[inline]
5327#[target_feature(enable = "avx512f,avx512vl")]
5328#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5329#[cfg_attr(test, assert_instr(vrcp14pd))]
5330pub fn _mm256_maskz_rcp14_pd(k: __mmask8, a: __m256d) -> __m256d {
5331 unsafe { transmute(src:vrcp14pd256(a.as_f64x4(), src:f64x4::ZERO, m:k)) }
5332}
5333
5334/// Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
5335///
5336/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rcp14_pd&expand=4487)
5337#[inline]
5338#[target_feature(enable = "avx512f,avx512vl")]
5339#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5340#[cfg_attr(test, assert_instr(vrcp14pd))]
5341pub fn _mm_rcp14_pd(a: __m128d) -> __m128d {
5342 unsafe { transmute(src:vrcp14pd128(a.as_f64x2(), src:f64x2::ZERO, m:0b00000011)) }
5343}
5344
5345/// Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5346///
5347/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_rcp14_pd&expand=4485)
5348#[inline]
5349#[target_feature(enable = "avx512f,avx512vl")]
5350#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5351#[cfg_attr(test, assert_instr(vrcp14pd))]
5352pub fn _mm_mask_rcp14_pd(src: __m128d, k: __mmask8, a: __m128d) -> __m128d {
5353 unsafe { transmute(src:vrcp14pd128(a.as_f64x2(), src.as_f64x2(), m:k)) }
5354}
5355
5356/// Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5357///
5358/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_rcp14_pd&expand=4486)
5359#[inline]
5360#[target_feature(enable = "avx512f,avx512vl")]
5361#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5362#[cfg_attr(test, assert_instr(vrcp14pd))]
5363pub fn _mm_maskz_rcp14_pd(k: __mmask8, a: __m128d) -> __m128d {
5364 unsafe { transmute(src:vrcp14pd128(a.as_f64x2(), src:f64x2::ZERO, m:k)) }
5365}
5366
5367/// Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
5368///
5369/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_rsqrt14_ps&expand=4819)
5370#[inline]
5371#[target_feature(enable = "avx512f")]
5372#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5373#[cfg_attr(test, assert_instr(vrsqrt14ps))]
5374pub fn _mm512_rsqrt14_ps(a: __m512) -> __m512 {
5375 unsafe { transmute(src:vrsqrt14ps(a.as_f32x16(), src:f32x16::ZERO, m:0b11111111_11111111)) }
5376}
5377
5378/// Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5379///
5380/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_rsqrt14_ps&expand=4817)
5381#[inline]
5382#[target_feature(enable = "avx512f")]
5383#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5384#[cfg_attr(test, assert_instr(vrsqrt14ps))]
5385pub fn _mm512_mask_rsqrt14_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 {
5386 unsafe { transmute(src:vrsqrt14ps(a.as_f32x16(), src.as_f32x16(), m:k)) }
5387}
5388
5389/// Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5390///
5391/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_rsqrt14_ps&expand=4818)
5392#[inline]
5393#[target_feature(enable = "avx512f")]
5394#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5395#[cfg_attr(test, assert_instr(vrsqrt14ps))]
5396pub fn _mm512_maskz_rsqrt14_ps(k: __mmask16, a: __m512) -> __m512 {
5397 unsafe { transmute(src:vrsqrt14ps(a.as_f32x16(), src:f32x16::ZERO, m:k)) }
5398}
5399
5400/// Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
5401///
5402/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_rsqrt14_ps)
5403#[inline]
5404#[target_feature(enable = "avx512f,avx512vl")]
5405#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5406#[cfg_attr(test, assert_instr(vrsqrt14ps))]
5407pub fn _mm256_rsqrt14_ps(a: __m256) -> __m256 {
5408 unsafe { transmute(src:vrsqrt14ps256(a.as_f32x8(), src:f32x8::ZERO, m:0b11111111)) }
5409}
5410
5411/// Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5412///
5413/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_rsqrt14_ps&expand=4815)
5414#[inline]
5415#[target_feature(enable = "avx512f,avx512vl")]
5416#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5417#[cfg_attr(test, assert_instr(vrsqrt14ps))]
5418pub fn _mm256_mask_rsqrt14_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 {
5419 unsafe { transmute(src:vrsqrt14ps256(a.as_f32x8(), src.as_f32x8(), m:k)) }
5420}
5421
5422/// Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5423///
5424/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_rsqrt14_ps&expand=4816)
5425#[inline]
5426#[target_feature(enable = "avx512f,avx512vl")]
5427#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5428#[cfg_attr(test, assert_instr(vrsqrt14ps))]
5429pub fn _mm256_maskz_rsqrt14_ps(k: __mmask8, a: __m256) -> __m256 {
5430 unsafe { transmute(src:vrsqrt14ps256(a.as_f32x8(), src:f32x8::ZERO, m:k)) }
5431}
5432
5433/// Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
5434///
5435/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rsqrt14_ps)
5436#[inline]
5437#[target_feature(enable = "avx512f,avx512vl")]
5438#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5439#[cfg_attr(test, assert_instr(vrsqrt14ps))]
5440pub fn _mm_rsqrt14_ps(a: __m128) -> __m128 {
5441 unsafe { transmute(src:vrsqrt14ps128(a.as_f32x4(), src:f32x4::ZERO, m:0b00001111)) }
5442}
5443
5444/// Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5445///
5446/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_rsqrt14_ps&expand=4813)
5447#[inline]
5448#[target_feature(enable = "avx512f,avx512vl")]
5449#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5450#[cfg_attr(test, assert_instr(vrsqrt14ps))]
5451pub fn _mm_mask_rsqrt14_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 {
5452 unsafe { transmute(src:vrsqrt14ps128(a.as_f32x4(), src.as_f32x4(), m:k)) }
5453}
5454
5455/// Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5456///
5457/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_rsqrt14_ps&expand=4814)
5458#[inline]
5459#[target_feature(enable = "avx512f,avx512vl")]
5460#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5461#[cfg_attr(test, assert_instr(vrsqrt14ps))]
5462pub fn _mm_maskz_rsqrt14_ps(k: __mmask8, a: __m128) -> __m128 {
5463 unsafe { transmute(src:vrsqrt14ps128(a.as_f32x4(), src:f32x4::ZERO, m:k)) }
5464}
5465
5466/// Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
5467///
5468/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_rsqrt14_pd&expand=4812)
5469#[inline]
5470#[target_feature(enable = "avx512f")]
5471#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5472#[cfg_attr(test, assert_instr(vrsqrt14pd))]
5473pub fn _mm512_rsqrt14_pd(a: __m512d) -> __m512d {
5474 unsafe { transmute(src:vrsqrt14pd(a.as_f64x8(), src:f64x8::ZERO, m:0b11111111)) }
5475}
5476
5477/// Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5478///
5479/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_rsqrt14_pd&expand=4810)
5480#[inline]
5481#[target_feature(enable = "avx512f")]
5482#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5483#[cfg_attr(test, assert_instr(vrsqrt14pd))]
5484pub fn _mm512_mask_rsqrt14_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d {
5485 unsafe { transmute(src:vrsqrt14pd(a.as_f64x8(), src.as_f64x8(), m:k)) }
5486}
5487
5488/// Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5489///
5490/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_rsqrt14_pd&expand=4811)
5491#[inline]
5492#[target_feature(enable = "avx512f")]
5493#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5494#[cfg_attr(test, assert_instr(vrsqrt14pd))]
5495pub fn _mm512_maskz_rsqrt14_pd(k: __mmask8, a: __m512d) -> __m512d {
5496 unsafe { transmute(src:vrsqrt14pd(a.as_f64x8(), src:f64x8::ZERO, m:k)) }
5497}
5498
5499/// Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
5500///
5501/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_rsqrt14_pd)
5502#[inline]
5503#[target_feature(enable = "avx512f,avx512vl")]
5504#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5505#[cfg_attr(test, assert_instr(vrsqrt14pd))]
5506pub fn _mm256_rsqrt14_pd(a: __m256d) -> __m256d {
5507 unsafe { transmute(src:vrsqrt14pd256(a.as_f64x4(), src:f64x4::ZERO, m:0b00001111)) }
5508}
5509
5510/// Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5511///
5512/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_rsqrt14_pd&expand=4808)
5513#[inline]
5514#[target_feature(enable = "avx512f,avx512vl")]
5515#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5516#[cfg_attr(test, assert_instr(vrsqrt14pd))]
5517pub fn _mm256_mask_rsqrt14_pd(src: __m256d, k: __mmask8, a: __m256d) -> __m256d {
5518 unsafe { transmute(src:vrsqrt14pd256(a.as_f64x4(), src.as_f64x4(), m:k)) }
5519}
5520
5521/// Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5522///
5523/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_rsqrt14_pd&expand=4809)
5524#[inline]
5525#[target_feature(enable = "avx512f,avx512vl")]
5526#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5527#[cfg_attr(test, assert_instr(vrsqrt14pd))]
5528pub fn _mm256_maskz_rsqrt14_pd(k: __mmask8, a: __m256d) -> __m256d {
5529 unsafe { transmute(src:vrsqrt14pd256(a.as_f64x4(), src:f64x4::ZERO, m:k)) }
5530}
5531
5532/// Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
5533///
5534/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rsqrt14_pd)
5535#[inline]
5536#[target_feature(enable = "avx512f,avx512vl")]
5537#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5538#[cfg_attr(test, assert_instr(vrsqrt14pd))]
5539pub fn _mm_rsqrt14_pd(a: __m128d) -> __m128d {
5540 unsafe { transmute(src:vrsqrt14pd128(a.as_f64x2(), src:f64x2::ZERO, m:0b00000011)) }
5541}
5542
5543/// Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5544///
5545/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_rsqrt14_pd&expand=4806)
5546#[inline]
5547#[target_feature(enable = "avx512f,avx512vl")]
5548#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5549#[cfg_attr(test, assert_instr(vrsqrt14pd))]
5550pub fn _mm_mask_rsqrt14_pd(src: __m128d, k: __mmask8, a: __m128d) -> __m128d {
5551 unsafe { transmute(src:vrsqrt14pd128(a.as_f64x2(), src.as_f64x2(), m:k)) }
5552}
5553
5554/// Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5555///
5556/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_rsqrt14_pd&expand=4807)
5557#[inline]
5558#[target_feature(enable = "avx512f,avx512vl")]
5559#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5560#[cfg_attr(test, assert_instr(vrsqrt14pd))]
5561pub fn _mm_maskz_rsqrt14_pd(k: __mmask8, a: __m128d) -> __m128d {
5562 unsafe { transmute(src:vrsqrt14pd128(a.as_f64x2(), src:f64x2::ZERO, m:k)) }
5563}
5564
5565/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst. This intrinsic essentially calculates floor(log2(x)) for each element.
5566///
5567/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_getexp_ps&expand=2844)
5568#[inline]
5569#[target_feature(enable = "avx512f")]
5570#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5571#[cfg_attr(test, assert_instr(vgetexpps))]
5572pub fn _mm512_getexp_ps(a: __m512) -> __m512 {
5573 unsafe {
5574 transmute(src:vgetexpps(
5575 a.as_f32x16(),
5576 src:f32x16::ZERO,
5577 m:0b11111111_11111111,
5578 _MM_FROUND_CUR_DIRECTION,
5579 ))
5580 }
5581}
5582
5583/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
5584///
5585/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_getexp_ps&expand=2845)
5586#[inline]
5587#[target_feature(enable = "avx512f")]
5588#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5589#[cfg_attr(test, assert_instr(vgetexpps))]
5590pub fn _mm512_mask_getexp_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 {
5591 unsafe {
5592 transmute(src:vgetexpps(
5593 a.as_f32x16(),
5594 src.as_f32x16(),
5595 m:k,
5596 _MM_FROUND_CUR_DIRECTION,
5597 ))
5598 }
5599}
5600
5601/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
5602///
5603/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_getexp_ps&expand=2846)
5604#[inline]
5605#[target_feature(enable = "avx512f")]
5606#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5607#[cfg_attr(test, assert_instr(vgetexpps))]
5608pub fn _mm512_maskz_getexp_ps(k: __mmask16, a: __m512) -> __m512 {
5609 unsafe {
5610 transmute(src:vgetexpps(
5611 a.as_f32x16(),
5612 src:f32x16::ZERO,
5613 m:k,
5614 _MM_FROUND_CUR_DIRECTION,
5615 ))
5616 }
5617}
5618
5619/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst. This intrinsic essentially calculates floor(log2(x)) for each element.
5620///
5621/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_getexp_ps&expand=2841)
5622#[inline]
5623#[target_feature(enable = "avx512f,avx512vl")]
5624#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5625#[cfg_attr(test, assert_instr(vgetexpps))]
5626pub fn _mm256_getexp_ps(a: __m256) -> __m256 {
5627 unsafe { transmute(src:vgetexpps256(a.as_f32x8(), src:f32x8::ZERO, m:0b11111111)) }
5628}
5629
5630/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
5631///
5632/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_getexp_ps&expand=2842)
5633#[inline]
5634#[target_feature(enable = "avx512f,avx512vl")]
5635#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5636#[cfg_attr(test, assert_instr(vgetexpps))]
5637pub fn _mm256_mask_getexp_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 {
5638 unsafe { transmute(src:vgetexpps256(a.as_f32x8(), src.as_f32x8(), m:k)) }
5639}
5640
5641/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
5642///
5643/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_getexp_ps&expand=2843)
5644#[inline]
5645#[target_feature(enable = "avx512f,avx512vl")]
5646#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5647#[cfg_attr(test, assert_instr(vgetexpps))]
5648pub fn _mm256_maskz_getexp_ps(k: __mmask8, a: __m256) -> __m256 {
5649 unsafe { transmute(src:vgetexpps256(a.as_f32x8(), src:f32x8::ZERO, m:k)) }
5650}
5651
5652/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst. This intrinsic essentially calculates floor(log2(x)) for each element.
5653///
5654/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_getexp_ps&expand=2838)
5655#[inline]
5656#[target_feature(enable = "avx512f,avx512vl")]
5657#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5658#[cfg_attr(test, assert_instr(vgetexpps))]
5659pub fn _mm_getexp_ps(a: __m128) -> __m128 {
5660 unsafe { transmute(src:vgetexpps128(a.as_f32x4(), src:f32x4::ZERO, m:0b00001111)) }
5661}
5662
5663/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
5664///
5665/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_getexp_ps&expand=2839)
5666#[inline]
5667#[target_feature(enable = "avx512f,avx512vl")]
5668#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5669#[cfg_attr(test, assert_instr(vgetexpps))]
5670pub fn _mm_mask_getexp_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 {
5671 unsafe { transmute(src:vgetexpps128(a.as_f32x4(), src.as_f32x4(), m:k)) }
5672}
5673
5674/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
5675///
5676/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_getexp_ps&expand=2840)
5677#[inline]
5678#[target_feature(enable = "avx512f,avx512vl")]
5679#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5680#[cfg_attr(test, assert_instr(vgetexpps))]
5681pub fn _mm_maskz_getexp_ps(k: __mmask8, a: __m128) -> __m128 {
5682 unsafe { transmute(src:vgetexpps128(a.as_f32x4(), src:f32x4::ZERO, m:k)) }
5683}
5684
5685/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst. This intrinsic essentially calculates floor(log2(x)) for each element.
5686///
5687/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_getexp_pd&expand=2835)
5688#[inline]
5689#[target_feature(enable = "avx512f")]
5690#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5691#[cfg_attr(test, assert_instr(vgetexppd))]
5692pub fn _mm512_getexp_pd(a: __m512d) -> __m512d {
5693 unsafe {
5694 transmute(src:vgetexppd(
5695 a.as_f64x8(),
5696 src:f64x8::ZERO,
5697 m:0b11111111,
5698 _MM_FROUND_CUR_DIRECTION,
5699 ))
5700 }
5701}
5702
5703/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
5704///
5705/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_getexp_pd&expand=2836)
5706#[inline]
5707#[target_feature(enable = "avx512f")]
5708#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5709#[cfg_attr(test, assert_instr(vgetexppd))]
5710pub fn _mm512_mask_getexp_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d {
5711 unsafe {
5712 transmute(src:vgetexppd(
5713 a.as_f64x8(),
5714 src.as_f64x8(),
5715 m:k,
5716 _MM_FROUND_CUR_DIRECTION,
5717 ))
5718 }
5719}
5720
5721/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
5722///
5723/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_getexp_pd&expand=2837)
5724#[inline]
5725#[target_feature(enable = "avx512f")]
5726#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5727#[cfg_attr(test, assert_instr(vgetexppd))]
5728pub fn _mm512_maskz_getexp_pd(k: __mmask8, a: __m512d) -> __m512d {
5729 unsafe {
5730 transmute(src:vgetexppd(
5731 a.as_f64x8(),
5732 src:f64x8::ZERO,
5733 m:k,
5734 _MM_FROUND_CUR_DIRECTION,
5735 ))
5736 }
5737}
5738
5739/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst. This intrinsic essentially calculates floor(log2(x)) for each element.
5740///
5741/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_getexp_pd&expand=2832)
5742#[inline]
5743#[target_feature(enable = "avx512f,avx512vl")]
5744#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5745#[cfg_attr(test, assert_instr(vgetexppd))]
5746pub fn _mm256_getexp_pd(a: __m256d) -> __m256d {
5747 unsafe { transmute(src:vgetexppd256(a.as_f64x4(), src:f64x4::ZERO, m:0b00001111)) }
5748}
5749
5750/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
5751///
5752/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_getexp_pd&expand=2833)
5753#[inline]
5754#[target_feature(enable = "avx512f,avx512vl")]
5755#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5756#[cfg_attr(test, assert_instr(vgetexppd))]
5757pub fn _mm256_mask_getexp_pd(src: __m256d, k: __mmask8, a: __m256d) -> __m256d {
5758 unsafe { transmute(src:vgetexppd256(a.as_f64x4(), src.as_f64x4(), m:k)) }
5759}
5760
5761/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
5762///
5763/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_getexp_pd&expand=2834)
5764#[inline]
5765#[target_feature(enable = "avx512f,avx512vl")]
5766#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5767#[cfg_attr(test, assert_instr(vgetexppd))]
5768pub fn _mm256_maskz_getexp_pd(k: __mmask8, a: __m256d) -> __m256d {
5769 unsafe { transmute(src:vgetexppd256(a.as_f64x4(), src:f64x4::ZERO, m:k)) }
5770}
5771
5772/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst. This intrinsic essentially calculates floor(log2(x)) for each element.
5773///
5774/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_getexp_pd&expand=2829)
5775#[inline]
5776#[target_feature(enable = "avx512f,avx512vl")]
5777#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5778#[cfg_attr(test, assert_instr(vgetexppd))]
5779pub fn _mm_getexp_pd(a: __m128d) -> __m128d {
5780 unsafe { transmute(src:vgetexppd128(a.as_f64x2(), src:f64x2::ZERO, m:0b00000011)) }
5781}
5782
5783/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
5784///
5785/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_getexp_pd&expand=2830)
5786#[inline]
5787#[target_feature(enable = "avx512f,avx512vl")]
5788#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5789#[cfg_attr(test, assert_instr(vgetexppd))]
5790pub fn _mm_mask_getexp_pd(src: __m128d, k: __mmask8, a: __m128d) -> __m128d {
5791 unsafe { transmute(src:vgetexppd128(a.as_f64x2(), src.as_f64x2(), m:k)) }
5792}
5793
5794/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
5795///
5796/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_getexp_pd&expand=2831)
5797#[inline]
5798#[target_feature(enable = "avx512f,avx512vl")]
5799#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5800#[cfg_attr(test, assert_instr(vgetexppd))]
5801pub fn _mm_maskz_getexp_pd(k: __mmask8, a: __m128d) -> __m128d {
5802 unsafe { transmute(src:vgetexppd128(a.as_f64x2(), src:f64x2::ZERO, m:k)) }
5803}
5804
5805/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst.\
5806/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5807/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5808/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5809/// * [`_MM_FROUND_TO_POS_INF`] : round up
5810/// * [`_MM_FROUND_TO_ZERO`] : truncate
5811/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5812///
5813/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_roundscale_ps&expand=4784)
5814#[inline]
5815#[target_feature(enable = "avx512f")]
5816#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5817#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 0))]
5818#[rustc_legacy_const_generics(1)]
5819pub fn _mm512_roundscale_ps<const IMM8: i32>(a: __m512) -> __m512 {
5820 unsafe {
5821 static_assert_uimm_bits!(IMM8, 8);
5822 let a: Simd = a.as_f32x16();
5823 let r: Simd = vrndscaleps(
5824 a,
5825 IMM8,
5826 src:f32x16::ZERO,
5827 mask:0b11111111_11111111,
5828 _MM_FROUND_CUR_DIRECTION,
5829 );
5830 transmute(src:r)
5831 }
5832}
5833
5834/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
5835/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5836/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5837/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5838/// * [`_MM_FROUND_TO_POS_INF`] : round up
5839/// * [`_MM_FROUND_TO_ZERO`] : truncate
5840/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5841///
5842/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_roundscale_ps&expand=4782)
5843#[inline]
5844#[target_feature(enable = "avx512f")]
5845#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5846#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 0))]
5847#[rustc_legacy_const_generics(3)]
5848pub fn _mm512_mask_roundscale_ps<const IMM8: i32>(src: __m512, k: __mmask16, a: __m512) -> __m512 {
5849 unsafe {
5850 static_assert_uimm_bits!(IMM8, 8);
5851 let a: Simd = a.as_f32x16();
5852 let src: Simd = src.as_f32x16();
5853 let r: Simd = vrndscaleps(a, IMM8, src, mask:k, _MM_FROUND_CUR_DIRECTION);
5854 transmute(src:r)
5855 }
5856}
5857
5858/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
5859/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5860/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5861/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5862/// * [`_MM_FROUND_TO_POS_INF`] : round up
5863/// * [`_MM_FROUND_TO_ZERO`] : truncate
5864/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5865///
5866/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_roundscale_ps&expand=4783)
5867#[inline]
5868#[target_feature(enable = "avx512f")]
5869#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5870#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 0))]
5871#[rustc_legacy_const_generics(2)]
5872pub fn _mm512_maskz_roundscale_ps<const IMM8: i32>(k: __mmask16, a: __m512) -> __m512 {
5873 unsafe {
5874 static_assert_uimm_bits!(IMM8, 8);
5875 let a: Simd = a.as_f32x16();
5876 let r: Simd = vrndscaleps(a, IMM8, src:f32x16::ZERO, mask:k, _MM_FROUND_CUR_DIRECTION);
5877 transmute(src:r)
5878 }
5879}
5880
5881/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst.\
5882/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5883/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5884/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5885/// * [`_MM_FROUND_TO_POS_INF`] : round up
5886/// * [`_MM_FROUND_TO_ZERO`] : truncate
5887/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5888///
5889/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_roundscale_ps&expand=4781)
5890#[inline]
5891#[target_feature(enable = "avx512f,avx512vl")]
5892#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5893#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 250))]
5894#[rustc_legacy_const_generics(1)]
5895pub fn _mm256_roundscale_ps<const IMM8: i32>(a: __m256) -> __m256 {
5896 unsafe {
5897 static_assert_uimm_bits!(IMM8, 8);
5898 let a: Simd = a.as_f32x8();
5899 let r: Simd = vrndscaleps256(a, IMM8, src:f32x8::ZERO, mask:0b11111111);
5900 transmute(src:r)
5901 }
5902}
5903
5904/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
5905/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5906/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5907/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5908/// * [`_MM_FROUND_TO_POS_INF`] : round up
5909/// * [`_MM_FROUND_TO_ZERO`] : truncate
5910/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5911///
5912/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_roundscale_ps&expand=4779)
5913#[inline]
5914#[target_feature(enable = "avx512f,avx512vl")]
5915#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5916#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 0))]
5917#[rustc_legacy_const_generics(3)]
5918pub fn _mm256_mask_roundscale_ps<const IMM8: i32>(src: __m256, k: __mmask8, a: __m256) -> __m256 {
5919 unsafe {
5920 static_assert_uimm_bits!(IMM8, 8);
5921 let a: Simd = a.as_f32x8();
5922 let src: Simd = src.as_f32x8();
5923 let r: Simd = vrndscaleps256(a, IMM8, src, mask:k);
5924 transmute(src:r)
5925 }
5926}
5927
5928/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
5929/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5930/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5931/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5932/// * [`_MM_FROUND_TO_POS_INF`] : round up
5933/// * [`_MM_FROUND_TO_ZERO`] : truncate
5934/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5935///
5936/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_roundscale_ps&expand=4780)
5937#[inline]
5938#[target_feature(enable = "avx512f,avx512vl")]
5939#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5940#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 0))]
5941#[rustc_legacy_const_generics(2)]
5942pub fn _mm256_maskz_roundscale_ps<const IMM8: i32>(k: __mmask8, a: __m256) -> __m256 {
5943 unsafe {
5944 static_assert_uimm_bits!(IMM8, 8);
5945 let a: Simd = a.as_f32x8();
5946 let r: Simd = vrndscaleps256(a, IMM8, src:f32x8::ZERO, mask:k);
5947 transmute(src:r)
5948 }
5949}
5950
5951/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst.\
5952/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5953/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5954/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5955/// * [`_MM_FROUND_TO_POS_INF`] : round up
5956/// * [`_MM_FROUND_TO_ZERO`] : truncate
5957/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5958///
5959/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_roundscale_ps&expand=4778)
5960#[inline]
5961#[target_feature(enable = "avx512f,avx512vl")]
5962#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5963#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 250))]
5964#[rustc_legacy_const_generics(1)]
5965pub fn _mm_roundscale_ps<const IMM8: i32>(a: __m128) -> __m128 {
5966 unsafe {
5967 static_assert_uimm_bits!(IMM8, 8);
5968 let a: Simd = a.as_f32x4();
5969 let r: Simd = vrndscaleps128(a, IMM8, src:f32x4::ZERO, mask:0b00001111);
5970 transmute(src:r)
5971 }
5972}
5973
5974/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
5975/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5976/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5977/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5978/// * [`_MM_FROUND_TO_POS_INF`] : round up
5979/// * [`_MM_FROUND_TO_ZERO`] : truncate
5980/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5981///
5982/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_roundscale_ps&expand=4776)
5983#[inline]
5984#[target_feature(enable = "avx512f,avx512vl")]
5985#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5986#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 0))]
5987#[rustc_legacy_const_generics(3)]
5988pub fn _mm_mask_roundscale_ps<const IMM8: i32>(src: __m128, k: __mmask8, a: __m128) -> __m128 {
5989 unsafe {
5990 static_assert_uimm_bits!(IMM8, 8);
5991 let a: Simd = a.as_f32x4();
5992 let src: Simd = src.as_f32x4();
5993 let r: Simd = vrndscaleps128(a, IMM8, src, mask:k);
5994 transmute(src:r)
5995 }
5996}
5997
5998/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
5999/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
6000/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6001/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6002/// * [`_MM_FROUND_TO_POS_INF`] : round up
6003/// * [`_MM_FROUND_TO_ZERO`] : truncate
6004/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6005///
6006/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_roundscale_ps&expand=4777)
6007#[inline]
6008#[target_feature(enable = "avx512f,avx512vl")]
6009#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6010#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 0))]
6011#[rustc_legacy_const_generics(2)]
6012pub fn _mm_maskz_roundscale_ps<const IMM8: i32>(k: __mmask8, a: __m128) -> __m128 {
6013 unsafe {
6014 static_assert_uimm_bits!(IMM8, 8);
6015 let a: Simd = a.as_f32x4();
6016 let r: Simd = vrndscaleps128(a, IMM8, src:f32x4::ZERO, mask:k);
6017 transmute(src:r)
6018 }
6019}
6020
6021/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst.\
6022/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
6023/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6024/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6025/// * [`_MM_FROUND_TO_POS_INF`] : round up
6026/// * [`_MM_FROUND_TO_ZERO`] : truncate
6027/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6028///
6029/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_roundscale_pd&expand=4775)
6030#[inline]
6031#[target_feature(enable = "avx512f")]
6032#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6033#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 0))]
6034#[rustc_legacy_const_generics(1)]
6035pub fn _mm512_roundscale_pd<const IMM8: i32>(a: __m512d) -> __m512d {
6036 unsafe {
6037 static_assert_uimm_bits!(IMM8, 8);
6038 let a: Simd = a.as_f64x8();
6039 let r: Simd = vrndscalepd(a, IMM8, src:f64x8::ZERO, mask:0b11111111, _MM_FROUND_CUR_DIRECTION);
6040 transmute(src:r)
6041 }
6042}
6043
6044/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
6045/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
6046/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6047/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6048/// * [`_MM_FROUND_TO_POS_INF`] : round up
6049/// * [`_MM_FROUND_TO_ZERO`] : truncate
6050/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6051///
6052/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_roundscale_pd&expand=4773)
6053#[inline]
6054#[target_feature(enable = "avx512f")]
6055#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6056#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 0))]
6057#[rustc_legacy_const_generics(3)]
6058pub fn _mm512_mask_roundscale_pd<const IMM8: i32>(
6059 src: __m512d,
6060 k: __mmask8,
6061 a: __m512d,
6062) -> __m512d {
6063 unsafe {
6064 static_assert_uimm_bits!(IMM8, 8);
6065 let a: Simd = a.as_f64x8();
6066 let src: Simd = src.as_f64x8();
6067 let r: Simd = vrndscalepd(a, IMM8, src, mask:k, _MM_FROUND_CUR_DIRECTION);
6068 transmute(src:r)
6069 }
6070}
6071
6072/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
6073/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
6074/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6075/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6076/// * [`_MM_FROUND_TO_POS_INF`] : round up
6077/// * [`_MM_FROUND_TO_ZERO`] : truncate
6078/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6079///
6080/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_roundscale_pd&expand=4774)
6081#[inline]
6082#[target_feature(enable = "avx512f")]
6083#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6084#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 0))]
6085#[rustc_legacy_const_generics(2)]
6086pub fn _mm512_maskz_roundscale_pd<const IMM8: i32>(k: __mmask8, a: __m512d) -> __m512d {
6087 unsafe {
6088 static_assert_uimm_bits!(IMM8, 8);
6089 let a: Simd = a.as_f64x8();
6090 let r: Simd = vrndscalepd(a, IMM8, src:f64x8::ZERO, mask:k, _MM_FROUND_CUR_DIRECTION);
6091 transmute(src:r)
6092 }
6093}
6094
6095/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst.\
6096/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
6097/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6098/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6099/// * [`_MM_FROUND_TO_POS_INF`] : round up
6100/// * [`_MM_FROUND_TO_ZERO`] : truncate
6101/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6102///
6103/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_roundscale_pd&expand=4772)
6104#[inline]
6105#[target_feature(enable = "avx512f,avx512vl")]
6106#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6107#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 16))]
6108#[rustc_legacy_const_generics(1)]
6109pub fn _mm256_roundscale_pd<const IMM8: i32>(a: __m256d) -> __m256d {
6110 unsafe {
6111 static_assert_uimm_bits!(IMM8, 8);
6112 let a: Simd = a.as_f64x4();
6113 let r: Simd = vrndscalepd256(a, IMM8, src:f64x4::ZERO, mask:0b00001111);
6114 transmute(src:r)
6115 }
6116}
6117
6118/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
6119/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
6120/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6121/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6122/// * [`_MM_FROUND_TO_POS_INF`] : round up
6123/// * [`_MM_FROUND_TO_ZERO`] : truncate
6124/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6125///
6126/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_roundscale_pd&expand=4770)
6127#[inline]
6128#[target_feature(enable = "avx512f,avx512vl")]
6129#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6130#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 0))]
6131#[rustc_legacy_const_generics(3)]
6132pub fn _mm256_mask_roundscale_pd<const IMM8: i32>(
6133 src: __m256d,
6134 k: __mmask8,
6135 a: __m256d,
6136) -> __m256d {
6137 unsafe {
6138 static_assert_uimm_bits!(IMM8, 8);
6139 let a: Simd = a.as_f64x4();
6140 let src: Simd = src.as_f64x4();
6141 let r: Simd = vrndscalepd256(a, IMM8, src, mask:k);
6142 transmute(src:r)
6143 }
6144}
6145
6146/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
6147/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
6148/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6149/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6150/// * [`_MM_FROUND_TO_POS_INF`] : round up
6151/// * [`_MM_FROUND_TO_ZERO`] : truncate
6152/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6153///
6154/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_roundscale_pd&expand=4771)
6155#[inline]
6156#[target_feature(enable = "avx512f,avx512vl")]
6157#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6158#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 0))]
6159#[rustc_legacy_const_generics(2)]
6160pub fn _mm256_maskz_roundscale_pd<const IMM8: i32>(k: __mmask8, a: __m256d) -> __m256d {
6161 unsafe {
6162 static_assert_uimm_bits!(IMM8, 8);
6163 let a: Simd = a.as_f64x4();
6164 let r: Simd = vrndscalepd256(a, IMM8, src:f64x4::ZERO, mask:k);
6165 transmute(src:r)
6166 }
6167}
6168
6169/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst.\
6170/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
6171/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6172/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6173/// * [`_MM_FROUND_TO_POS_INF`] : round up
6174/// * [`_MM_FROUND_TO_ZERO`] : truncate
6175/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6176///
6177/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_roundscale_pd&expand=4769)
6178#[inline]
6179#[target_feature(enable = "avx512f,avx512vl")]
6180#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6181#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 16))]
6182#[rustc_legacy_const_generics(1)]
6183pub fn _mm_roundscale_pd<const IMM8: i32>(a: __m128d) -> __m128d {
6184 unsafe {
6185 static_assert_uimm_bits!(IMM8, 8);
6186 let a: Simd = a.as_f64x2();
6187 let r: Simd = vrndscalepd128(a, IMM8, src:f64x2::ZERO, mask:0b00000011);
6188 transmute(src:r)
6189 }
6190}
6191
6192/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
6193/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
6194/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6195/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6196/// * [`_MM_FROUND_TO_POS_INF`] : round up
6197/// * [`_MM_FROUND_TO_ZERO`] : truncate
6198/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6199///
6200/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_roundscale_pd&expand=4767)
6201#[inline]
6202#[target_feature(enable = "avx512f,avx512vl")]
6203#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6204#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 0))]
6205#[rustc_legacy_const_generics(3)]
6206pub fn _mm_mask_roundscale_pd<const IMM8: i32>(src: __m128d, k: __mmask8, a: __m128d) -> __m128d {
6207 unsafe {
6208 static_assert_uimm_bits!(IMM8, 8);
6209 let a: Simd = a.as_f64x2();
6210 let src: Simd = src.as_f64x2();
6211 let r: Simd = vrndscalepd128(a, IMM8, src, mask:k);
6212 transmute(src:r)
6213 }
6214}
6215
6216/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
6217/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
6218/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6219/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6220/// * [`_MM_FROUND_TO_POS_INF`] : round up
6221/// * [`_MM_FROUND_TO_ZERO`] : truncate
6222/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6223///
6224/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_roundscale_pd&expand=4768)
6225#[inline]
6226#[target_feature(enable = "avx512f,avx512vl")]
6227#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6228#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 0))]
6229#[rustc_legacy_const_generics(2)]
6230pub fn _mm_maskz_roundscale_pd<const IMM8: i32>(k: __mmask8, a: __m128d) -> __m128d {
6231 unsafe {
6232 static_assert_uimm_bits!(IMM8, 8);
6233 let a: Simd = a.as_f64x2();
6234 let r: Simd = vrndscalepd128(a, IMM8, src:f64x2::ZERO, mask:k);
6235 transmute(src:r)
6236 }
6237}
6238
6239/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst.
6240///
6241/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_scalef_ps&expand=4883)
6242#[inline]
6243#[target_feature(enable = "avx512f")]
6244#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6245#[cfg_attr(test, assert_instr(vscalefps))]
6246pub fn _mm512_scalef_ps(a: __m512, b: __m512) -> __m512 {
6247 unsafe {
6248 transmute(src:vscalefps(
6249 a.as_f32x16(),
6250 b.as_f32x16(),
6251 src:f32x16::ZERO,
6252 mask:0b11111111_11111111,
6253 _MM_FROUND_CUR_DIRECTION,
6254 ))
6255 }
6256}
6257
6258/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6259///
6260/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_scalef_ps&expand=4881)
6261#[inline]
6262#[target_feature(enable = "avx512f")]
6263#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6264#[cfg_attr(test, assert_instr(vscalefps))]
6265pub fn _mm512_mask_scalef_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
6266 unsafe {
6267 transmute(src:vscalefps(
6268 a.as_f32x16(),
6269 b.as_f32x16(),
6270 src.as_f32x16(),
6271 mask:k,
6272 _MM_FROUND_CUR_DIRECTION,
6273 ))
6274 }
6275}
6276
6277/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6278///
6279/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_scalef_ps&expand=4882)
6280#[inline]
6281#[target_feature(enable = "avx512f")]
6282#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6283#[cfg_attr(test, assert_instr(vscalefps))]
6284pub fn _mm512_maskz_scalef_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
6285 unsafe {
6286 transmute(src:vscalefps(
6287 a.as_f32x16(),
6288 b.as_f32x16(),
6289 src:f32x16::ZERO,
6290 mask:k,
6291 _MM_FROUND_CUR_DIRECTION,
6292 ))
6293 }
6294}
6295
6296/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst.
6297///
6298/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_scalef_ps&expand=4880)
6299#[inline]
6300#[target_feature(enable = "avx512f,avx512vl")]
6301#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6302#[cfg_attr(test, assert_instr(vscalefps))]
6303pub fn _mm256_scalef_ps(a: __m256, b: __m256) -> __m256 {
6304 unsafe {
6305 transmute(src:vscalefps256(
6306 a.as_f32x8(),
6307 b.as_f32x8(),
6308 src:f32x8::ZERO,
6309 mask:0b11111111,
6310 ))
6311 }
6312}
6313
6314/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6315///
6316/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_scalef_ps&expand=4878)
6317#[inline]
6318#[target_feature(enable = "avx512f,avx512vl")]
6319#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6320#[cfg_attr(test, assert_instr(vscalefps))]
6321pub fn _mm256_mask_scalef_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
6322 unsafe { transmute(src:vscalefps256(a.as_f32x8(), b.as_f32x8(), src.as_f32x8(), mask:k)) }
6323}
6324
6325/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6326///
6327/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_scalef_ps&expand=4879)
6328#[inline]
6329#[target_feature(enable = "avx512f,avx512vl")]
6330#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6331#[cfg_attr(test, assert_instr(vscalefps))]
6332pub fn _mm256_maskz_scalef_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
6333 unsafe { transmute(src:vscalefps256(a.as_f32x8(), b.as_f32x8(), src:f32x8::ZERO, mask:k)) }
6334}
6335
6336/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst.
6337///
6338/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_scalef_ps&expand=4877)
6339#[inline]
6340#[target_feature(enable = "avx512f,avx512vl")]
6341#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6342#[cfg_attr(test, assert_instr(vscalefps))]
6343pub fn _mm_scalef_ps(a: __m128, b: __m128) -> __m128 {
6344 unsafe {
6345 transmute(src:vscalefps128(
6346 a.as_f32x4(),
6347 b.as_f32x4(),
6348 src:f32x4::ZERO,
6349 mask:0b00001111,
6350 ))
6351 }
6352}
6353
6354/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6355///
6356/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_scalef_ps&expand=4875)
6357#[inline]
6358#[target_feature(enable = "avx512f,avx512vl")]
6359#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6360#[cfg_attr(test, assert_instr(vscalefps))]
6361pub fn _mm_mask_scalef_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
6362 unsafe { transmute(src:vscalefps128(a.as_f32x4(), b.as_f32x4(), src.as_f32x4(), mask:k)) }
6363}
6364
6365/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6366///
6367/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_scalef_ps&expand=4876)
6368#[inline]
6369#[target_feature(enable = "avx512f,avx512vl")]
6370#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6371#[cfg_attr(test, assert_instr(vscalefps))]
6372pub fn _mm_maskz_scalef_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
6373 unsafe { transmute(src:vscalefps128(a.as_f32x4(), b.as_f32x4(), src:f32x4::ZERO, mask:k)) }
6374}
6375
6376/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst.
6377///
6378/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_scalef_pd&expand=4874)
6379#[inline]
6380#[target_feature(enable = "avx512f")]
6381#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6382#[cfg_attr(test, assert_instr(vscalefpd))]
6383pub fn _mm512_scalef_pd(a: __m512d, b: __m512d) -> __m512d {
6384 unsafe {
6385 transmute(src:vscalefpd(
6386 a.as_f64x8(),
6387 b.as_f64x8(),
6388 src:f64x8::ZERO,
6389 mask:0b11111111,
6390 _MM_FROUND_CUR_DIRECTION,
6391 ))
6392 }
6393}
6394
6395/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6396///
6397/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_scalef_pd&expand=4872)
6398#[inline]
6399#[target_feature(enable = "avx512f")]
6400#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6401#[cfg_attr(test, assert_instr(vscalefpd))]
6402pub fn _mm512_mask_scalef_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
6403 unsafe {
6404 transmute(src:vscalefpd(
6405 a.as_f64x8(),
6406 b.as_f64x8(),
6407 src.as_f64x8(),
6408 mask:k,
6409 _MM_FROUND_CUR_DIRECTION,
6410 ))
6411 }
6412}
6413
6414/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6415///
6416/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_scalef_pd&expand=4873)
6417#[inline]
6418#[target_feature(enable = "avx512f")]
6419#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6420#[cfg_attr(test, assert_instr(vscalefpd))]
6421pub fn _mm512_maskz_scalef_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
6422 unsafe {
6423 transmute(src:vscalefpd(
6424 a.as_f64x8(),
6425 b.as_f64x8(),
6426 src:f64x8::ZERO,
6427 mask:k,
6428 _MM_FROUND_CUR_DIRECTION,
6429 ))
6430 }
6431}
6432
6433/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst.
6434///
6435/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_scalef_pd&expand=4871)
6436#[inline]
6437#[target_feature(enable = "avx512f,avx512vl")]
6438#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6439#[cfg_attr(test, assert_instr(vscalefpd))]
6440pub fn _mm256_scalef_pd(a: __m256d, b: __m256d) -> __m256d {
6441 unsafe {
6442 transmute(src:vscalefpd256(
6443 a.as_f64x4(),
6444 b.as_f64x4(),
6445 src:f64x4::ZERO,
6446 mask:0b00001111,
6447 ))
6448 }
6449}
6450
6451/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6452///
6453/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_scalef_pd&expand=4869)
6454#[inline]
6455#[target_feature(enable = "avx512f,avx512vl")]
6456#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6457#[cfg_attr(test, assert_instr(vscalefpd))]
6458pub fn _mm256_mask_scalef_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
6459 unsafe { transmute(src:vscalefpd256(a.as_f64x4(), b.as_f64x4(), src.as_f64x4(), mask:k)) }
6460}
6461
6462/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6463///
6464/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_scalef_pd&expand=4870)
6465#[inline]
6466#[target_feature(enable = "avx512f,avx512vl")]
6467#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6468#[cfg_attr(test, assert_instr(vscalefpd))]
6469pub fn _mm256_maskz_scalef_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
6470 unsafe { transmute(src:vscalefpd256(a.as_f64x4(), b.as_f64x4(), src:f64x4::ZERO, mask:k)) }
6471}
6472
6473/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst.
6474///
6475/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_scalef_pd&expand=4868)
6476#[inline]
6477#[target_feature(enable = "avx512f,avx512vl")]
6478#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6479#[cfg_attr(test, assert_instr(vscalefpd))]
6480pub fn _mm_scalef_pd(a: __m128d, b: __m128d) -> __m128d {
6481 unsafe {
6482 transmute(src:vscalefpd128(
6483 a.as_f64x2(),
6484 b.as_f64x2(),
6485 src:f64x2::ZERO,
6486 mask:0b00000011,
6487 ))
6488 }
6489}
6490
6491/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6492///
6493/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_scalef_pd&expand=4866)
6494#[inline]
6495#[target_feature(enable = "avx512f,avx512vl")]
6496#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6497#[cfg_attr(test, assert_instr(vscalefpd))]
6498pub fn _mm_mask_scalef_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
6499 unsafe { transmute(src:vscalefpd128(a.as_f64x2(), b.as_f64x2(), src.as_f64x2(), mask:k)) }
6500}
6501
6502/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6503///
6504/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_scalef_pd&expand=4867)
6505#[inline]
6506#[target_feature(enable = "avx512f,avx512vl")]
6507#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6508#[cfg_attr(test, assert_instr(vscalefpd))]
6509pub fn _mm_maskz_scalef_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
6510 unsafe { transmute(src:vscalefpd128(a.as_f64x2(), b.as_f64x2(), src:f64x2::ZERO, mask:k)) }
6511}
6512
6513/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst. imm8 is used to set the required flags reporting.
6514///
6515/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fixupimm_ps&expand=2499)
6516#[inline]
6517#[target_feature(enable = "avx512f")]
6518#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6519#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0))]
6520#[rustc_legacy_const_generics(3)]
6521pub fn _mm512_fixupimm_ps<const IMM8: i32>(a: __m512, b: __m512, c: __m512i) -> __m512 {
6522 unsafe {
6523 static_assert_uimm_bits!(IMM8, 8);
6524 let a: Simd = a.as_f32x16();
6525 let b: Simd = b.as_f32x16();
6526 let c: Simd = c.as_i32x16();
6527 let r: Simd = vfixupimmps(a, b, c, IMM8, mask:0b11111111_11111111, _MM_FROUND_CUR_DIRECTION);
6528 transmute(src:r)
6529 }
6530}
6531
6532/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
6533///
6534/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fixupimm_ps&expand=2500)
6535#[inline]
6536#[target_feature(enable = "avx512f")]
6537#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6538#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0))]
6539#[rustc_legacy_const_generics(4)]
6540pub fn _mm512_mask_fixupimm_ps<const IMM8: i32>(
6541 a: __m512,
6542 k: __mmask16,
6543 b: __m512,
6544 c: __m512i,
6545) -> __m512 {
6546 unsafe {
6547 static_assert_uimm_bits!(IMM8, 8);
6548 let a: Simd = a.as_f32x16();
6549 let b: Simd = b.as_f32x16();
6550 let c: Simd = c.as_i32x16();
6551 let r: Simd = vfixupimmps(a, b, c, IMM8, mask:k, _MM_FROUND_CUR_DIRECTION);
6552 transmute(src:r)
6553 }
6554}
6555
6556/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
6557///
6558/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fixupimm_ps&expand=2501)
6559#[inline]
6560#[target_feature(enable = "avx512f")]
6561#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6562#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0))]
6563#[rustc_legacy_const_generics(4)]
6564pub fn _mm512_maskz_fixupimm_ps<const IMM8: i32>(
6565 k: __mmask16,
6566 a: __m512,
6567 b: __m512,
6568 c: __m512i,
6569) -> __m512 {
6570 unsafe {
6571 static_assert_uimm_bits!(IMM8, 8);
6572 let a: Simd = a.as_f32x16();
6573 let b: Simd = b.as_f32x16();
6574 let c: Simd = c.as_i32x16();
6575 let r: Simd = vfixupimmpsz(a, b, c, IMM8, mask:k, _MM_FROUND_CUR_DIRECTION);
6576 transmute(src:r)
6577 }
6578}
6579
6580/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst. imm8 is used to set the required flags reporting.
6581///
6582/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_fixupimm_ps&expand=2496)
6583#[inline]
6584#[target_feature(enable = "avx512f,avx512vl")]
6585#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6586#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0))]
6587#[rustc_legacy_const_generics(3)]
6588pub fn _mm256_fixupimm_ps<const IMM8: i32>(a: __m256, b: __m256, c: __m256i) -> __m256 {
6589 unsafe {
6590 static_assert_uimm_bits!(IMM8, 8);
6591 let a: Simd = a.as_f32x8();
6592 let b: Simd = b.as_f32x8();
6593 let c: Simd = c.as_i32x8();
6594 let r: Simd = vfixupimmps256(a, b, c, IMM8, mask:0b11111111);
6595 transmute(src:r)
6596 }
6597}
6598
6599/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
6600///
6601/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fixupimm_ps&expand=2497)
6602#[inline]
6603#[target_feature(enable = "avx512f,avx512vl")]
6604#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6605#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0))]
6606#[rustc_legacy_const_generics(4)]
6607pub fn _mm256_mask_fixupimm_ps<const IMM8: i32>(
6608 a: __m256,
6609 k: __mmask8,
6610 b: __m256,
6611 c: __m256i,
6612) -> __m256 {
6613 unsafe {
6614 static_assert_uimm_bits!(IMM8, 8);
6615 let a: Simd = a.as_f32x8();
6616 let b: Simd = b.as_f32x8();
6617 let c: Simd = c.as_i32x8();
6618 let r: Simd = vfixupimmps256(a, b, c, IMM8, mask:k);
6619 transmute(src:r)
6620 }
6621}
6622
6623/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
6624///
6625/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fixupimm_ps&expand=2498)
6626#[inline]
6627#[target_feature(enable = "avx512f,avx512vl")]
6628#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6629#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0))]
6630#[rustc_legacy_const_generics(4)]
6631pub fn _mm256_maskz_fixupimm_ps<const IMM8: i32>(
6632 k: __mmask8,
6633 a: __m256,
6634 b: __m256,
6635 c: __m256i,
6636) -> __m256 {
6637 unsafe {
6638 static_assert_uimm_bits!(IMM8, 8);
6639 let a: Simd = a.as_f32x8();
6640 let b: Simd = b.as_f32x8();
6641 let c: Simd = c.as_i32x8();
6642 let r: Simd = vfixupimmpsz256(a, b, c, IMM8, mask:k);
6643 transmute(src:r)
6644 }
6645}
6646
6647/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst. imm8 is used to set the required flags reporting.
6648///
6649/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fixupimm_ps&expand=2493)
6650#[inline]
6651#[target_feature(enable = "avx512f,avx512vl")]
6652#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6653#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0))]
6654#[rustc_legacy_const_generics(3)]
6655pub fn _mm_fixupimm_ps<const IMM8: i32>(a: __m128, b: __m128, c: __m128i) -> __m128 {
6656 unsafe {
6657 static_assert_uimm_bits!(IMM8, 8);
6658 let a: Simd = a.as_f32x4();
6659 let b: Simd = b.as_f32x4();
6660 let c: Simd = c.as_i32x4();
6661 let r: Simd = vfixupimmps128(a, b, c, IMM8, mask:0b00001111);
6662 transmute(src:r)
6663 }
6664}
6665
6666/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
6667///
6668/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fixupimm_ps&expand=2494)
6669#[inline]
6670#[target_feature(enable = "avx512f,avx512vl")]
6671#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6672#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0))]
6673#[rustc_legacy_const_generics(4)]
6674pub fn _mm_mask_fixupimm_ps<const IMM8: i32>(
6675 a: __m128,
6676 k: __mmask8,
6677 b: __m128,
6678 c: __m128i,
6679) -> __m128 {
6680 unsafe {
6681 static_assert_uimm_bits!(IMM8, 8);
6682 let a: Simd = a.as_f32x4();
6683 let b: Simd = b.as_f32x4();
6684 let c: Simd = c.as_i32x4();
6685 let r: Simd = vfixupimmps128(a, b, c, IMM8, mask:k);
6686 transmute(src:r)
6687 }
6688}
6689
6690/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
6691///
6692/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fixupimm_ps&expand=2495)
6693#[inline]
6694#[target_feature(enable = "avx512f,avx512vl")]
6695#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6696#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0))]
6697#[rustc_legacy_const_generics(4)]
6698pub fn _mm_maskz_fixupimm_ps<const IMM8: i32>(
6699 k: __mmask8,
6700 a: __m128,
6701 b: __m128,
6702 c: __m128i,
6703) -> __m128 {
6704 unsafe {
6705 static_assert_uimm_bits!(IMM8, 8);
6706 let a: Simd = a.as_f32x4();
6707 let b: Simd = b.as_f32x4();
6708 let c: Simd = c.as_i32x4();
6709 let r: Simd = vfixupimmpsz128(a, b, c, IMM8, mask:k);
6710 transmute(src:r)
6711 }
6712}
6713
6714/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst. imm8 is used to set the required flags reporting.
6715///
6716/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fixupimm_pd&expand=2490)
6717#[inline]
6718#[target_feature(enable = "avx512f")]
6719#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6720#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0))]
6721#[rustc_legacy_const_generics(3)]
6722pub fn _mm512_fixupimm_pd<const IMM8: i32>(a: __m512d, b: __m512d, c: __m512i) -> __m512d {
6723 unsafe {
6724 static_assert_uimm_bits!(IMM8, 8);
6725 let a: Simd = a.as_f64x8();
6726 let b: Simd = b.as_f64x8();
6727 let c: Simd = c.as_i64x8();
6728 let r: Simd = vfixupimmpd(a, b, c, IMM8, mask:0b11111111, _MM_FROUND_CUR_DIRECTION);
6729 transmute(src:r)
6730 }
6731}
6732
6733/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
6734///
6735/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fixupimm_pd&expand=2491)
6736#[inline]
6737#[target_feature(enable = "avx512f")]
6738#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6739#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0))]
6740#[rustc_legacy_const_generics(4)]
6741pub fn _mm512_mask_fixupimm_pd<const IMM8: i32>(
6742 a: __m512d,
6743 k: __mmask8,
6744 b: __m512d,
6745 c: __m512i,
6746) -> __m512d {
6747 unsafe {
6748 static_assert_uimm_bits!(IMM8, 8);
6749 let a: Simd = a.as_f64x8();
6750 let b: Simd = b.as_f64x8();
6751 let c: Simd = c.as_i64x8();
6752 let r: Simd = vfixupimmpd(a, b, c, IMM8, mask:k, _MM_FROUND_CUR_DIRECTION);
6753 transmute(src:r)
6754 }
6755}
6756
6757/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
6758///
6759/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fixupimm_pd&expand=2492)
6760#[inline]
6761#[target_feature(enable = "avx512f")]
6762#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6763#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0))]
6764#[rustc_legacy_const_generics(4)]
6765pub fn _mm512_maskz_fixupimm_pd<const IMM8: i32>(
6766 k: __mmask8,
6767 a: __m512d,
6768 b: __m512d,
6769 c: __m512i,
6770) -> __m512d {
6771 unsafe {
6772 static_assert_uimm_bits!(IMM8, 8);
6773 let a: Simd = a.as_f64x8();
6774 let b: Simd = b.as_f64x8();
6775 let c: Simd = c.as_i64x8();
6776 let r: Simd = vfixupimmpdz(a, b, c, IMM8, mask:k, _MM_FROUND_CUR_DIRECTION);
6777 transmute(src:r)
6778 }
6779}
6780
6781/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst. imm8 is used to set the required flags reporting.
6782///
6783/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_fixupimm_pd&expand=2487)
6784#[inline]
6785#[target_feature(enable = "avx512f,avx512vl")]
6786#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6787#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0))]
6788#[rustc_legacy_const_generics(3)]
6789pub fn _mm256_fixupimm_pd<const IMM8: i32>(a: __m256d, b: __m256d, c: __m256i) -> __m256d {
6790 unsafe {
6791 static_assert_uimm_bits!(IMM8, 8);
6792 let a: Simd = a.as_f64x4();
6793 let b: Simd = b.as_f64x4();
6794 let c: Simd = c.as_i64x4();
6795 let r: Simd = vfixupimmpd256(a, b, c, IMM8, mask:0b00001111);
6796 transmute(src:r)
6797 }
6798}
6799
6800/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
6801///
6802/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fixupimm_pd&expand=2488)
6803#[inline]
6804#[target_feature(enable = "avx512f,avx512vl")]
6805#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6806#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0))]
6807#[rustc_legacy_const_generics(4)]
6808pub fn _mm256_mask_fixupimm_pd<const IMM8: i32>(
6809 a: __m256d,
6810 k: __mmask8,
6811 b: __m256d,
6812 c: __m256i,
6813) -> __m256d {
6814 unsafe {
6815 static_assert_uimm_bits!(IMM8, 8);
6816 let a: Simd = a.as_f64x4();
6817 let b: Simd = b.as_f64x4();
6818 let c: Simd = c.as_i64x4();
6819 let r: Simd = vfixupimmpd256(a, b, c, IMM8, mask:k);
6820 transmute(src:r)
6821 }
6822}
6823
6824/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
6825///
6826/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fixupimm_pd&expand=2489)
6827#[inline]
6828#[target_feature(enable = "avx512f,avx512vl")]
6829#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6830#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0))]
6831#[rustc_legacy_const_generics(4)]
6832pub fn _mm256_maskz_fixupimm_pd<const IMM8: i32>(
6833 k: __mmask8,
6834 a: __m256d,
6835 b: __m256d,
6836 c: __m256i,
6837) -> __m256d {
6838 unsafe {
6839 static_assert_uimm_bits!(IMM8, 8);
6840 let a: Simd = a.as_f64x4();
6841 let b: Simd = b.as_f64x4();
6842 let c: Simd = c.as_i64x4();
6843 let r: Simd = vfixupimmpdz256(a, b, c, IMM8, mask:k);
6844 transmute(src:r)
6845 }
6846}
6847
6848/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst. imm8 is used to set the required flags reporting.
6849///
6850/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fixupimm_pd&expand=2484)
6851#[inline]
6852#[target_feature(enable = "avx512f,avx512vl")]
6853#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6854#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0))]
6855#[rustc_legacy_const_generics(3)]
6856pub fn _mm_fixupimm_pd<const IMM8: i32>(a: __m128d, b: __m128d, c: __m128i) -> __m128d {
6857 unsafe {
6858 static_assert_uimm_bits!(IMM8, 8);
6859 let a: Simd = a.as_f64x2();
6860 let b: Simd = b.as_f64x2();
6861 let c: Simd = c.as_i64x2();
6862 let r: Simd = vfixupimmpd128(a, b, c, IMM8, mask:0b00000011);
6863 transmute(src:r)
6864 }
6865}
6866
6867/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
6868///
6869/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fixupimm_pd&expand=2485)
6870#[inline]
6871#[target_feature(enable = "avx512f,avx512vl")]
6872#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6873#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0))]
6874#[rustc_legacy_const_generics(4)]
6875pub fn _mm_mask_fixupimm_pd<const IMM8: i32>(
6876 a: __m128d,
6877 k: __mmask8,
6878 b: __m128d,
6879 c: __m128i,
6880) -> __m128d {
6881 unsafe {
6882 static_assert_uimm_bits!(IMM8, 8);
6883 let a: Simd = a.as_f64x2();
6884 let b: Simd = b.as_f64x2();
6885 let c: Simd = c.as_i64x2();
6886 let r: Simd = vfixupimmpd128(a, b, c, IMM8, mask:k);
6887 transmute(src:r)
6888 }
6889}
6890
6891/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
6892///
6893/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fixupimm_pd&expand=2486)
6894#[inline]
6895#[target_feature(enable = "avx512f,avx512vl")]
6896#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6897#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0))]
6898#[rustc_legacy_const_generics(4)]
6899pub fn _mm_maskz_fixupimm_pd<const IMM8: i32>(
6900 k: __mmask8,
6901 a: __m128d,
6902 b: __m128d,
6903 c: __m128i,
6904) -> __m128d {
6905 unsafe {
6906 static_assert_uimm_bits!(IMM8, 8);
6907 let a: Simd = a.as_f64x2();
6908 let b: Simd = b.as_f64x2();
6909 let c: Simd = c.as_i64x2();
6910 let r: Simd = vfixupimmpdz128(a, b, c, IMM8, mask:k);
6911 transmute(src:r)
6912 }
6913}
6914
6915/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 32-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst.
6916///
6917/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_ternarylogic_epi32&expand=5867)
6918#[inline]
6919#[target_feature(enable = "avx512f")]
6920#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6921#[cfg_attr(test, assert_instr(vpternlogd, IMM8 = 114))]
6922#[rustc_legacy_const_generics(3)]
6923pub fn _mm512_ternarylogic_epi32<const IMM8: i32>(a: __m512i, b: __m512i, c: __m512i) -> __m512i {
6924 unsafe {
6925 static_assert_uimm_bits!(IMM8, 8);
6926 let a: Simd = a.as_i32x16();
6927 let b: Simd = b.as_i32x16();
6928 let c: Simd = c.as_i32x16();
6929 let r: Simd = vpternlogd(a, b, c, IMM8);
6930 transmute(src:r)
6931 }
6932}
6933
6934/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 32-bit integer, the corresponding bit from src, a, and b are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using writemask k at 32-bit granularity (32-bit elements are copied from src when the corresponding mask bit is not set).
6935///
6936/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_ternarylogic_epi32&expand=5865)
6937#[inline]
6938#[target_feature(enable = "avx512f")]
6939#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6940#[cfg_attr(test, assert_instr(vpternlogd, IMM8 = 114))]
6941#[rustc_legacy_const_generics(4)]
6942pub fn _mm512_mask_ternarylogic_epi32<const IMM8: i32>(
6943 src: __m512i,
6944 k: __mmask16,
6945 a: __m512i,
6946 b: __m512i,
6947) -> __m512i {
6948 unsafe {
6949 static_assert_uimm_bits!(IMM8, 8);
6950 let src: Simd = src.as_i32x16();
6951 let a: Simd = a.as_i32x16();
6952 let b: Simd = b.as_i32x16();
6953 let r: Simd = vpternlogd(a:src, b:a, c:b, IMM8);
6954 transmute(src:simd_select_bitmask(m:k, yes:r, no:src))
6955 }
6956}
6957
6958/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 32-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using zeromask k at 32-bit granularity (32-bit elements are zeroed out when the corresponding mask bit is not set).
6959///
6960/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_ternarylogic_epi32&expand=5866)
6961#[inline]
6962#[target_feature(enable = "avx512f")]
6963#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6964#[cfg_attr(test, assert_instr(vpternlogd, IMM8 = 114))]
6965#[rustc_legacy_const_generics(4)]
6966pub fn _mm512_maskz_ternarylogic_epi32<const IMM8: i32>(
6967 k: __mmask16,
6968 a: __m512i,
6969 b: __m512i,
6970 c: __m512i,
6971) -> __m512i {
6972 unsafe {
6973 static_assert_uimm_bits!(IMM8, 8);
6974 let a: Simd = a.as_i32x16();
6975 let b: Simd = b.as_i32x16();
6976 let c: Simd = c.as_i32x16();
6977 let r: Simd = vpternlogd(a, b, c, IMM8);
6978 transmute(src:simd_select_bitmask(m:k, yes:r, no:i32x16::ZERO))
6979 }
6980}
6981
6982/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 32-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst.
6983///
6984/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_ternarylogic_epi32&expand=5864)
6985#[inline]
6986#[target_feature(enable = "avx512f,avx512vl")]
6987#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6988#[cfg_attr(test, assert_instr(vpternlogd, IMM8 = 114))]
6989#[rustc_legacy_const_generics(3)]
6990pub fn _mm256_ternarylogic_epi32<const IMM8: i32>(a: __m256i, b: __m256i, c: __m256i) -> __m256i {
6991 unsafe {
6992 static_assert_uimm_bits!(IMM8, 8);
6993 let a: Simd = a.as_i32x8();
6994 let b: Simd = b.as_i32x8();
6995 let c: Simd = c.as_i32x8();
6996 let r: Simd = vpternlogd256(a, b, c, IMM8);
6997 transmute(src:r)
6998 }
6999}
7000
7001/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 32-bit integer, the corresponding bit from src, a, and b are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using writemask k at 32-bit granularity (32-bit elements are copied from src when the corresponding mask bit is not set).
7002///
7003/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_ternarylogic_epi32&expand=5862)
7004#[inline]
7005#[target_feature(enable = "avx512f,avx512vl")]
7006#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7007#[cfg_attr(test, assert_instr(vpternlogd, IMM8 = 114))]
7008#[rustc_legacy_const_generics(4)]
7009pub fn _mm256_mask_ternarylogic_epi32<const IMM8: i32>(
7010 src: __m256i,
7011 k: __mmask8,
7012 a: __m256i,
7013 b: __m256i,
7014) -> __m256i {
7015 unsafe {
7016 static_assert_uimm_bits!(IMM8, 8);
7017 let src: Simd = src.as_i32x8();
7018 let a: Simd = a.as_i32x8();
7019 let b: Simd = b.as_i32x8();
7020 let r: Simd = vpternlogd256(a:src, b:a, c:b, IMM8);
7021 transmute(src:simd_select_bitmask(m:k, yes:r, no:src))
7022 }
7023}
7024
7025/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 32-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using zeromask k at 32-bit granularity (32-bit elements are zeroed out when the corresponding mask bit is not set).
7026///
7027/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_ternarylogic_epi32&expand=5863)
7028#[inline]
7029#[target_feature(enable = "avx512f,avx512vl")]
7030#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7031#[cfg_attr(test, assert_instr(vpternlogd, IMM8 = 114))]
7032#[rustc_legacy_const_generics(4)]
7033pub fn _mm256_maskz_ternarylogic_epi32<const IMM8: i32>(
7034 k: __mmask8,
7035 a: __m256i,
7036 b: __m256i,
7037 c: __m256i,
7038) -> __m256i {
7039 unsafe {
7040 static_assert_uimm_bits!(IMM8, 8);
7041 let a: Simd = a.as_i32x8();
7042 let b: Simd = b.as_i32x8();
7043 let c: Simd = c.as_i32x8();
7044 let r: Simd = vpternlogd256(a, b, c, IMM8);
7045 transmute(src:simd_select_bitmask(m:k, yes:r, no:i32x8::ZERO))
7046 }
7047}
7048
7049/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 32-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst.
7050///
7051/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_ternarylogic_epi32&expand=5861)
7052#[inline]
7053#[target_feature(enable = "avx512f,avx512vl")]
7054#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7055#[cfg_attr(test, assert_instr(vpternlogd, IMM8 = 114))]
7056#[rustc_legacy_const_generics(3)]
7057pub fn _mm_ternarylogic_epi32<const IMM8: i32>(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
7058 unsafe {
7059 static_assert_uimm_bits!(IMM8, 8);
7060 let a: Simd = a.as_i32x4();
7061 let b: Simd = b.as_i32x4();
7062 let c: Simd = c.as_i32x4();
7063 let r: Simd = vpternlogd128(a, b, c, IMM8);
7064 transmute(src:r)
7065 }
7066}
7067
7068/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 32-bit integer, the corresponding bit from src, a, and b are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using writemask k at 32-bit granularity (32-bit elements are copied from src when the corresponding mask bit is not set).
7069///
7070/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_ternarylogic_epi32&expand=5859)
7071#[inline]
7072#[target_feature(enable = "avx512f,avx512vl")]
7073#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7074#[cfg_attr(test, assert_instr(vpternlogd, IMM8 = 114))]
7075#[rustc_legacy_const_generics(4)]
7076pub fn _mm_mask_ternarylogic_epi32<const IMM8: i32>(
7077 src: __m128i,
7078 k: __mmask8,
7079 a: __m128i,
7080 b: __m128i,
7081) -> __m128i {
7082 unsafe {
7083 static_assert_uimm_bits!(IMM8, 8);
7084 let src: Simd = src.as_i32x4();
7085 let a: Simd = a.as_i32x4();
7086 let b: Simd = b.as_i32x4();
7087 let r: Simd = vpternlogd128(a:src, b:a, c:b, IMM8);
7088 transmute(src:simd_select_bitmask(m:k, yes:r, no:src))
7089 }
7090}
7091
7092/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 32-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using zeromask k at 32-bit granularity (32-bit elements are zeroed out when the corresponding mask bit is not set).
7093///
7094/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_ternarylogic_epi32&expand=5860)
7095#[inline]
7096#[target_feature(enable = "avx512f,avx512vl")]
7097#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7098#[cfg_attr(test, assert_instr(vpternlogd, IMM8 = 114))]
7099#[rustc_legacy_const_generics(4)]
7100pub fn _mm_maskz_ternarylogic_epi32<const IMM8: i32>(
7101 k: __mmask8,
7102 a: __m128i,
7103 b: __m128i,
7104 c: __m128i,
7105) -> __m128i {
7106 unsafe {
7107 static_assert_uimm_bits!(IMM8, 8);
7108 let a: Simd = a.as_i32x4();
7109 let b: Simd = b.as_i32x4();
7110 let c: Simd = c.as_i32x4();
7111 let r: Simd = vpternlogd128(a, b, c, IMM8);
7112 transmute(src:simd_select_bitmask(m:k, yes:r, no:i32x4::ZERO))
7113 }
7114}
7115
7116/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 64-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst.
7117///
7118/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_ternarylogic_epi64&expand=5876)
7119#[inline]
7120#[target_feature(enable = "avx512f")]
7121#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7122#[cfg_attr(test, assert_instr(vpternlogq, IMM8 = 114))]
7123#[rustc_legacy_const_generics(3)]
7124pub fn _mm512_ternarylogic_epi64<const IMM8: i32>(a: __m512i, b: __m512i, c: __m512i) -> __m512i {
7125 unsafe {
7126 static_assert_uimm_bits!(IMM8, 8);
7127 let a: Simd = a.as_i64x8();
7128 let b: Simd = b.as_i64x8();
7129 let c: Simd = c.as_i64x8();
7130 let r: Simd = vpternlogq(a, b, c, IMM8);
7131 transmute(src:r)
7132 }
7133}
7134
7135/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 64-bit integer, the corresponding bit from src, a, and b are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using writemask k at 64-bit granularity (64-bit elements are copied from src when the corresponding mask bit is not set).
7136///
7137/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_ternarylogic_epi64&expand=5874)
7138#[inline]
7139#[target_feature(enable = "avx512f")]
7140#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7141#[cfg_attr(test, assert_instr(vpternlogq, IMM8 = 114))]
7142#[rustc_legacy_const_generics(4)]
7143pub fn _mm512_mask_ternarylogic_epi64<const IMM8: i32>(
7144 src: __m512i,
7145 k: __mmask8,
7146 a: __m512i,
7147 b: __m512i,
7148) -> __m512i {
7149 unsafe {
7150 static_assert_uimm_bits!(IMM8, 8);
7151 let src: Simd = src.as_i64x8();
7152 let a: Simd = a.as_i64x8();
7153 let b: Simd = b.as_i64x8();
7154 let r: Simd = vpternlogq(a:src, b:a, c:b, IMM8);
7155 transmute(src:simd_select_bitmask(m:k, yes:r, no:src))
7156 }
7157}
7158
7159/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 64-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using zeromask k at 64-bit granularity (64-bit elements are zeroed out when the corresponding mask bit is not set).
7160///
7161/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_ternarylogic_epi64&expand=5875)
7162#[inline]
7163#[target_feature(enable = "avx512f")]
7164#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7165#[cfg_attr(test, assert_instr(vpternlogq, IMM8 = 114))]
7166#[rustc_legacy_const_generics(4)]
7167pub fn _mm512_maskz_ternarylogic_epi64<const IMM8: i32>(
7168 k: __mmask8,
7169 a: __m512i,
7170 b: __m512i,
7171 c: __m512i,
7172) -> __m512i {
7173 unsafe {
7174 static_assert_uimm_bits!(IMM8, 8);
7175 let a: Simd = a.as_i64x8();
7176 let b: Simd = b.as_i64x8();
7177 let c: Simd = c.as_i64x8();
7178 let r: Simd = vpternlogq(a, b, c, IMM8);
7179 transmute(src:simd_select_bitmask(m:k, yes:r, no:i64x8::ZERO))
7180 }
7181}
7182
7183/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 64-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst.
7184///
7185/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_ternarylogic_epi64&expand=5873)
7186#[inline]
7187#[target_feature(enable = "avx512f,avx512vl")]
7188#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7189#[cfg_attr(test, assert_instr(vpternlogq, IMM8 = 114))]
7190#[rustc_legacy_const_generics(3)]
7191pub fn _mm256_ternarylogic_epi64<const IMM8: i32>(a: __m256i, b: __m256i, c: __m256i) -> __m256i {
7192 unsafe {
7193 static_assert_uimm_bits!(IMM8, 8);
7194 let a: Simd = a.as_i64x4();
7195 let b: Simd = b.as_i64x4();
7196 let c: Simd = c.as_i64x4();
7197 let r: Simd = vpternlogq256(a, b, c, IMM8);
7198 transmute(src:r)
7199 }
7200}
7201
7202/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 64-bit integer, the corresponding bit from src, a, and b are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using writemask k at 64-bit granularity (64-bit elements are copied from src when the corresponding mask bit is not set).
7203///
7204/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_ternarylogic_epi64&expand=5871)
7205#[inline]
7206#[target_feature(enable = "avx512f,avx512vl")]
7207#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7208#[cfg_attr(test, assert_instr(vpternlogq, IMM8 = 114))]
7209#[rustc_legacy_const_generics(4)]
7210pub fn _mm256_mask_ternarylogic_epi64<const IMM8: i32>(
7211 src: __m256i,
7212 k: __mmask8,
7213 a: __m256i,
7214 b: __m256i,
7215) -> __m256i {
7216 unsafe {
7217 static_assert_uimm_bits!(IMM8, 8);
7218 let src: Simd = src.as_i64x4();
7219 let a: Simd = a.as_i64x4();
7220 let b: Simd = b.as_i64x4();
7221 let r: Simd = vpternlogq256(a:src, b:a, c:b, IMM8);
7222 transmute(src:simd_select_bitmask(m:k, yes:r, no:src))
7223 }
7224}
7225
7226/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 64-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using zeromask k at 64-bit granularity (64-bit elements are zeroed out when the corresponding mask bit is not set).
7227///
7228/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_ternarylogic_epi64&expand=5872)
7229#[inline]
7230#[target_feature(enable = "avx512f,avx512vl")]
7231#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7232#[cfg_attr(test, assert_instr(vpternlogq, IMM8 = 114))]
7233#[rustc_legacy_const_generics(4)]
7234pub fn _mm256_maskz_ternarylogic_epi64<const IMM8: i32>(
7235 k: __mmask8,
7236 a: __m256i,
7237 b: __m256i,
7238 c: __m256i,
7239) -> __m256i {
7240 unsafe {
7241 static_assert_uimm_bits!(IMM8, 8);
7242 let a: Simd = a.as_i64x4();
7243 let b: Simd = b.as_i64x4();
7244 let c: Simd = c.as_i64x4();
7245 let r: Simd = vpternlogq256(a, b, c, IMM8);
7246 transmute(src:simd_select_bitmask(m:k, yes:r, no:i64x4::ZERO))
7247 }
7248}
7249
7250/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 64-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst.
7251///
7252/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_ternarylogic_epi64&expand=5870)
7253#[inline]
7254#[target_feature(enable = "avx512f,avx512vl")]
7255#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7256#[cfg_attr(test, assert_instr(vpternlogq, IMM8 = 114))]
7257#[rustc_legacy_const_generics(3)]
7258pub fn _mm_ternarylogic_epi64<const IMM8: i32>(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
7259 unsafe {
7260 static_assert_uimm_bits!(IMM8, 8);
7261 let a: Simd = a.as_i64x2();
7262 let b: Simd = b.as_i64x2();
7263 let c: Simd = c.as_i64x2();
7264 let r: Simd = vpternlogq128(a, b, c, IMM8);
7265 transmute(src:r)
7266 }
7267}
7268
7269/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 64-bit integer, the corresponding bit from src, a, and b are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using writemask k at 64-bit granularity (64-bit elements are copied from src when the corresponding mask bit is not set).
7270///
7271/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_ternarylogic_epi64&expand=5868)
7272#[inline]
7273#[target_feature(enable = "avx512f,avx512vl")]
7274#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7275#[cfg_attr(test, assert_instr(vpternlogq, IMM8 = 114))]
7276#[rustc_legacy_const_generics(4)]
7277pub fn _mm_mask_ternarylogic_epi64<const IMM8: i32>(
7278 src: __m128i,
7279 k: __mmask8,
7280 a: __m128i,
7281 b: __m128i,
7282) -> __m128i {
7283 unsafe {
7284 static_assert_uimm_bits!(IMM8, 8);
7285 let src: Simd = src.as_i64x2();
7286 let a: Simd = a.as_i64x2();
7287 let b: Simd = b.as_i64x2();
7288 let r: Simd = vpternlogq128(a:src, b:a, c:b, IMM8);
7289 transmute(src:simd_select_bitmask(m:k, yes:r, no:src))
7290 }
7291}
7292
7293/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 64-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using zeromask k at 64-bit granularity (64-bit elements are zeroed out when the corresponding mask bit is not set).
7294///
7295/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_ternarylogic_epi64&expand=5869)
7296#[inline]
7297#[target_feature(enable = "avx512f,avx512vl")]
7298#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7299#[cfg_attr(test, assert_instr(vpternlogq, IMM8 = 114))]
7300#[rustc_legacy_const_generics(4)]
7301pub fn _mm_maskz_ternarylogic_epi64<const IMM8: i32>(
7302 k: __mmask8,
7303 a: __m128i,
7304 b: __m128i,
7305 c: __m128i,
7306) -> __m128i {
7307 unsafe {
7308 static_assert_uimm_bits!(IMM8, 8);
7309 let a: Simd = a.as_i64x2();
7310 let b: Simd = b.as_i64x2();
7311 let c: Simd = c.as_i64x2();
7312 let r: Simd = vpternlogq128(a, b, c, IMM8);
7313 transmute(src:simd_select_bitmask(m:k, yes:r, no:i64x2::ZERO))
7314 }
7315}
7316
7317/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.
7318/// The mantissa is normalized to the interval specified by interv, which can take the following values:
7319/// _MM_MANT_NORM_1_2 // interval [1, 2)
7320/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)
7321/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)
7322/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
7323/// The sign is determined by sc which can take the following values:
7324/// _MM_MANT_SIGN_src // sign = sign(src)
7325/// _MM_MANT_SIGN_zero // sign = 0
7326/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
7327///
7328/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_getmant_ps&expand=2880)
7329#[inline]
7330#[target_feature(enable = "avx512f")]
7331#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7332#[cfg_attr(test, assert_instr(vgetmantps, NORM = 0, SIGN = 0))]
7333#[rustc_legacy_const_generics(1, 2)]
7334pub fn _mm512_getmant_ps<const NORM: _MM_MANTISSA_NORM_ENUM, const SIGN: _MM_MANTISSA_SIGN_ENUM>(
7335 a: __m512,
7336) -> __m512 {
7337 unsafe {
7338 static_assert_uimm_bits!(NORM, 4);
7339 static_assert_uimm_bits!(SIGN, 2);
7340 let a: Simd = a.as_f32x16();
7341 let zero: Simd = f32x16::ZERO;
7342 let r: Simd = vgetmantps(
7343 a,
7344 SIGN << 2 | NORM,
7345 src:zero,
7346 m:0b11111111_11111111,
7347 _MM_FROUND_CUR_DIRECTION,
7348 );
7349 transmute(src:r)
7350 }
7351}
7352
7353/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7354/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7355/// _MM_MANT_NORM_1_2 // interval [1, 2)\
7356/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
7357/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
7358/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7359/// The sign is determined by sc which can take the following values:\
7360/// _MM_MANT_SIGN_src // sign = sign(src)\
7361/// _MM_MANT_SIGN_zero // sign = 0\
7362/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
7363///
7364/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_getmant_ps&expand=2881)
7365#[inline]
7366#[target_feature(enable = "avx512f")]
7367#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7368#[cfg_attr(test, assert_instr(vgetmantps, NORM = 0, SIGN = 0))]
7369#[rustc_legacy_const_generics(3, 4)]
7370pub fn _mm512_mask_getmant_ps<
7371 const NORM: _MM_MANTISSA_NORM_ENUM,
7372 const SIGN: _MM_MANTISSA_SIGN_ENUM,
7373>(
7374 src: __m512,
7375 k: __mmask16,
7376 a: __m512,
7377) -> __m512 {
7378 unsafe {
7379 static_assert_uimm_bits!(NORM, 4);
7380 static_assert_uimm_bits!(SIGN, 2);
7381 let a: Simd = a.as_f32x16();
7382 let src: Simd = src.as_f32x16();
7383 let r: Simd = vgetmantps(a, SIGN << 2 | NORM, src, m:k, _MM_FROUND_CUR_DIRECTION);
7384 transmute(src:r)
7385 }
7386}
7387
7388/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7389/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7390/// _MM_MANT_NORM_1_2 // interval [1, 2)\
7391/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
7392/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
7393/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7394/// The sign is determined by sc which can take the following values:\
7395/// _MM_MANT_SIGN_src // sign = sign(src)\
7396/// _MM_MANT_SIGN_zero // sign = 0\
7397/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
7398///
7399/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_getmant_ps&expand=2882)
7400#[inline]
7401#[target_feature(enable = "avx512f")]
7402#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7403#[cfg_attr(test, assert_instr(vgetmantps, NORM = 0, SIGN = 0))]
7404#[rustc_legacy_const_generics(2, 3)]
7405pub fn _mm512_maskz_getmant_ps<
7406 const NORM: _MM_MANTISSA_NORM_ENUM,
7407 const SIGN: _MM_MANTISSA_SIGN_ENUM,
7408>(
7409 k: __mmask16,
7410 a: __m512,
7411) -> __m512 {
7412 unsafe {
7413 static_assert_uimm_bits!(NORM, 4);
7414 static_assert_uimm_bits!(SIGN, 2);
7415 let a: Simd = a.as_f32x16();
7416 let r: Simd = vgetmantps(
7417 a,
7418 SIGN << 2 | NORM,
7419 src:f32x16::ZERO,
7420 m:k,
7421 _MM_FROUND_CUR_DIRECTION,
7422 );
7423 transmute(src:r)
7424 }
7425}
7426
7427/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.
7428/// The mantissa is normalized to the interval specified by interv, which can take the following values:
7429/// _MM_MANT_NORM_1_2 // interval [1, 2)
7430/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)
7431/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)
7432/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
7433/// The sign is determined by sc which can take the following values:
7434/// _MM_MANT_SIGN_src // sign = sign(src)
7435/// _MM_MANT_SIGN_zero // sign = 0
7436/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
7437///
7438/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_getmant_ps&expand=2877)
7439#[inline]
7440#[target_feature(enable = "avx512f,avx512vl")]
7441#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7442#[cfg_attr(test, assert_instr(vgetmantps, NORM = 0, SIGN = 0))]
7443#[rustc_legacy_const_generics(1, 2)]
7444pub fn _mm256_getmant_ps<const NORM: _MM_MANTISSA_NORM_ENUM, const SIGN: _MM_MANTISSA_SIGN_ENUM>(
7445 a: __m256,
7446) -> __m256 {
7447 unsafe {
7448 static_assert_uimm_bits!(NORM, 4);
7449 static_assert_uimm_bits!(SIGN, 2);
7450 let a: Simd = a.as_f32x8();
7451 let r: Simd = vgetmantps256(a, SIGN << 2 | NORM, src:f32x8::ZERO, m:0b11111111);
7452 transmute(src:r)
7453 }
7454}
7455
7456/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7457/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7458/// _MM_MANT_NORM_1_2 // interval [1, 2)\
7459/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
7460/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
7461/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7462/// The sign is determined by sc which can take the following values:\
7463/// _MM_MANT_SIGN_src // sign = sign(src)\
7464/// _MM_MANT_SIGN_zero // sign = 0\
7465/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
7466///
7467/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_getmant_ps&expand=2878)
7468#[inline]
7469#[target_feature(enable = "avx512f,avx512vl")]
7470#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7471#[cfg_attr(test, assert_instr(vgetmantps, NORM = 0, SIGN = 0))]
7472#[rustc_legacy_const_generics(3, 4)]
7473pub fn _mm256_mask_getmant_ps<
7474 const NORM: _MM_MANTISSA_NORM_ENUM,
7475 const SIGN: _MM_MANTISSA_SIGN_ENUM,
7476>(
7477 src: __m256,
7478 k: __mmask8,
7479 a: __m256,
7480) -> __m256 {
7481 unsafe {
7482 static_assert_uimm_bits!(NORM, 4);
7483 static_assert_uimm_bits!(SIGN, 2);
7484 let a: Simd = a.as_f32x8();
7485 let src: Simd = src.as_f32x8();
7486 let r: Simd = vgetmantps256(a, SIGN << 2 | NORM, src, m:k);
7487 transmute(src:r)
7488 }
7489}
7490
7491/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7492/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7493/// _MM_MANT_NORM_1_2 // interval [1, 2)\
7494/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
7495/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
7496/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7497/// The sign is determined by sc which can take the following values:\
7498/// _MM_MANT_SIGN_src // sign = sign(src)\
7499/// _MM_MANT_SIGN_zero // sign = 0\
7500/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
7501///
7502/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_getmant_ps&expand=2879)
7503#[inline]
7504#[target_feature(enable = "avx512f,avx512vl")]
7505#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7506#[cfg_attr(test, assert_instr(vgetmantps, NORM = 0, SIGN = 0))]
7507#[rustc_legacy_const_generics(2, 3)]
7508pub fn _mm256_maskz_getmant_ps<
7509 const NORM: _MM_MANTISSA_NORM_ENUM,
7510 const SIGN: _MM_MANTISSA_SIGN_ENUM,
7511>(
7512 k: __mmask8,
7513 a: __m256,
7514) -> __m256 {
7515 unsafe {
7516 static_assert_uimm_bits!(NORM, 4);
7517 static_assert_uimm_bits!(SIGN, 2);
7518 let a: Simd = a.as_f32x8();
7519 let r: Simd = vgetmantps256(a, SIGN << 2 | NORM, src:f32x8::ZERO, m:k);
7520 transmute(src:r)
7521 }
7522}
7523
7524/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.
7525/// The mantissa is normalized to the interval specified by interv, which can take the following values:
7526/// _MM_MANT_NORM_1_2 // interval [1, 2)
7527/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)
7528/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)
7529/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
7530/// The sign is determined by sc which can take the following values:
7531/// _MM_MANT_SIGN_src // sign = sign(src)
7532/// _MM_MANT_SIGN_zero // sign = 0
7533/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
7534///
7535/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_getmant_ps&expand=2874)
7536#[inline]
7537#[target_feature(enable = "avx512f,avx512vl")]
7538#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7539#[cfg_attr(test, assert_instr(vgetmantps, NORM = 0, SIGN = 0))]
7540#[rustc_legacy_const_generics(1, 2)]
7541pub fn _mm_getmant_ps<const NORM: _MM_MANTISSA_NORM_ENUM, const SIGN: _MM_MANTISSA_SIGN_ENUM>(
7542 a: __m128,
7543) -> __m128 {
7544 unsafe {
7545 static_assert_uimm_bits!(NORM, 4);
7546 static_assert_uimm_bits!(SIGN, 2);
7547 let a: Simd = a.as_f32x4();
7548 let r: Simd = vgetmantps128(a, SIGN << 2 | NORM, src:f32x4::ZERO, m:0b00001111);
7549 transmute(src:r)
7550 }
7551}
7552
7553/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7554/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7555/// _MM_MANT_NORM_1_2 // interval [1, 2)\
7556/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
7557/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
7558/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7559/// The sign is determined by sc which can take the following values:\
7560/// _MM_MANT_SIGN_src // sign = sign(src)\
7561/// _MM_MANT_SIGN_zero // sign = 0\
7562/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
7563///
7564/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_getmant_ps&expand=2875)
7565#[inline]
7566#[target_feature(enable = "avx512f,avx512vl")]
7567#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7568#[cfg_attr(test, assert_instr(vgetmantps, NORM = 0, SIGN = 0))]
7569#[rustc_legacy_const_generics(3, 4)]
7570pub fn _mm_mask_getmant_ps<
7571 const NORM: _MM_MANTISSA_NORM_ENUM,
7572 const SIGN: _MM_MANTISSA_SIGN_ENUM,
7573>(
7574 src: __m128,
7575 k: __mmask8,
7576 a: __m128,
7577) -> __m128 {
7578 unsafe {
7579 static_assert_uimm_bits!(NORM, 4);
7580 static_assert_uimm_bits!(SIGN, 2);
7581 let a: Simd = a.as_f32x4();
7582 let src: Simd = src.as_f32x4();
7583 let r: Simd = vgetmantps128(a, SIGN << 2 | NORM, src, m:k);
7584 transmute(src:r)
7585 }
7586}
7587
7588/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7589/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7590/// _MM_MANT_NORM_1_2 // interval [1, 2)\
7591/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
7592/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
7593/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7594/// The sign is determined by sc which can take the following values:\
7595/// _MM_MANT_SIGN_src // sign = sign(src)\
7596/// _MM_MANT_SIGN_zero // sign = 0\
7597/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
7598///
7599/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_getmant_ps&expand=2876)
7600#[inline]
7601#[target_feature(enable = "avx512f,avx512vl")]
7602#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7603#[cfg_attr(test, assert_instr(vgetmantps, NORM = 0, SIGN = 0))]
7604#[rustc_legacy_const_generics(2, 3)]
7605pub fn _mm_maskz_getmant_ps<
7606 const NORM: _MM_MANTISSA_NORM_ENUM,
7607 const SIGN: _MM_MANTISSA_SIGN_ENUM,
7608>(
7609 k: __mmask8,
7610 a: __m128,
7611) -> __m128 {
7612 unsafe {
7613 static_assert_uimm_bits!(NORM, 4);
7614 static_assert_uimm_bits!(SIGN, 2);
7615 let a: Simd = a.as_f32x4();
7616 let r: Simd = vgetmantps128(a, SIGN << 2 | NORM, src:f32x4::ZERO, m:k);
7617 transmute(src:r)
7618 }
7619}
7620
7621/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7622/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7623/// _MM_MANT_NORM_1_2 // interval [1, 2)\
7624/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
7625/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
7626/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7627/// The sign is determined by sc which can take the following values:\
7628/// _MM_MANT_SIGN_src // sign = sign(src)\
7629/// _MM_MANT_SIGN_zero // sign = 0\
7630/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
7631///
7632/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_getmant_pd&expand=2871)
7633#[inline]
7634#[target_feature(enable = "avx512f")]
7635#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7636#[cfg_attr(test, assert_instr(vgetmantpd, NORM = 0, SIGN = 0))]
7637#[rustc_legacy_const_generics(1, 2)]
7638pub fn _mm512_getmant_pd<const NORM: _MM_MANTISSA_NORM_ENUM, const SIGN: _MM_MANTISSA_SIGN_ENUM>(
7639 a: __m512d,
7640) -> __m512d {
7641 unsafe {
7642 static_assert_uimm_bits!(NORM, 4);
7643 static_assert_uimm_bits!(SIGN, 2);
7644 let a: Simd = a.as_f64x8();
7645 let zero: Simd = f64x8::ZERO;
7646 let r: Simd = vgetmantpd(
7647 a,
7648 SIGN << 2 | NORM,
7649 src:zero,
7650 m:0b11111111,
7651 _MM_FROUND_CUR_DIRECTION,
7652 );
7653 transmute(src:r)
7654 }
7655}
7656
7657/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7658/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7659/// _MM_MANT_NORM_1_2 // interval [1, 2)\
7660/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
7661/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
7662/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7663/// The sign is determined by sc which can take the following values:\
7664/// _MM_MANT_SIGN_src // sign = sign(src)\
7665/// _MM_MANT_SIGN_zero // sign = 0\
7666/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
7667///
7668/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_getmant_pd&expand=2872)
7669#[inline]
7670#[target_feature(enable = "avx512f")]
7671#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7672#[cfg_attr(test, assert_instr(vgetmantpd, NORM = 0, SIGN = 0))]
7673#[rustc_legacy_const_generics(3, 4)]
7674pub fn _mm512_mask_getmant_pd<
7675 const NORM: _MM_MANTISSA_NORM_ENUM,
7676 const SIGN: _MM_MANTISSA_SIGN_ENUM,
7677>(
7678 src: __m512d,
7679 k: __mmask8,
7680 a: __m512d,
7681) -> __m512d {
7682 unsafe {
7683 static_assert_uimm_bits!(NORM, 4);
7684 static_assert_uimm_bits!(SIGN, 2);
7685 let a: Simd = a.as_f64x8();
7686 let src: Simd = src.as_f64x8();
7687 let r: Simd = vgetmantpd(a, SIGN << 2 | NORM, src, m:k, _MM_FROUND_CUR_DIRECTION);
7688 transmute(src:r)
7689 }
7690}
7691
7692/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7693/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7694/// _MM_MANT_NORM_1_2 // interval [1, 2)\
7695/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
7696/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
7697/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7698/// The sign is determined by sc which can take the following values:\
7699/// _MM_MANT_SIGN_src // sign = sign(src)\
7700/// _MM_MANT_SIGN_zero // sign = 0\
7701/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
7702///
7703/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_getmant_pd&expand=2873)
7704#[inline]
7705#[target_feature(enable = "avx512f")]
7706#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7707#[cfg_attr(test, assert_instr(vgetmantpd, NORM = 0, SIGN = 0))]
7708#[rustc_legacy_const_generics(2, 3)]
7709pub fn _mm512_maskz_getmant_pd<
7710 const NORM: _MM_MANTISSA_NORM_ENUM,
7711 const SIGN: _MM_MANTISSA_SIGN_ENUM,
7712>(
7713 k: __mmask8,
7714 a: __m512d,
7715) -> __m512d {
7716 unsafe {
7717 static_assert_uimm_bits!(NORM, 4);
7718 static_assert_uimm_bits!(SIGN, 2);
7719 let a: Simd = a.as_f64x8();
7720 let r: Simd = vgetmantpd(
7721 a,
7722 SIGN << 2 | NORM,
7723 src:f64x8::ZERO,
7724 m:k,
7725 _MM_FROUND_CUR_DIRECTION,
7726 );
7727 transmute(src:r)
7728 }
7729}
7730
7731/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7732/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7733/// _MM_MANT_NORM_1_2 // interval [1, 2)\
7734/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
7735/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
7736/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7737/// The sign is determined by sc which can take the following values:\
7738/// _MM_MANT_SIGN_src // sign = sign(src)\
7739/// _MM_MANT_SIGN_zero // sign = 0\
7740/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
7741///
7742/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_getmant_pd&expand=2868)
7743#[inline]
7744#[target_feature(enable = "avx512f,avx512vl")]
7745#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7746#[cfg_attr(test, assert_instr(vgetmantpd, NORM = 0, SIGN = 0))]
7747#[rustc_legacy_const_generics(1, 2)]
7748pub fn _mm256_getmant_pd<const NORM: _MM_MANTISSA_NORM_ENUM, const SIGN: _MM_MANTISSA_SIGN_ENUM>(
7749 a: __m256d,
7750) -> __m256d {
7751 unsafe {
7752 static_assert_uimm_bits!(NORM, 4);
7753 static_assert_uimm_bits!(SIGN, 2);
7754 let a: Simd = a.as_f64x4();
7755 let r: Simd = vgetmantpd256(a, SIGN << 2 | NORM, src:f64x4::ZERO, m:0b00001111);
7756 transmute(src:r)
7757 }
7758}
7759
7760/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7761/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7762/// _MM_MANT_NORM_1_2 // interval [1, 2)\
7763/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
7764/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
7765/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7766/// The sign is determined by sc which can take the following values:\
7767/// _MM_MANT_SIGN_src // sign = sign(src)\
7768/// _MM_MANT_SIGN_zero // sign = 0\
7769/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
7770///
7771/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_getmant_pd&expand=2869)
7772#[inline]
7773#[target_feature(enable = "avx512f,avx512vl")]
7774#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7775#[cfg_attr(test, assert_instr(vgetmantpd, NORM = 0, SIGN = 0))]
7776#[rustc_legacy_const_generics(3, 4)]
7777pub fn _mm256_mask_getmant_pd<
7778 const NORM: _MM_MANTISSA_NORM_ENUM,
7779 const SIGN: _MM_MANTISSA_SIGN_ENUM,
7780>(
7781 src: __m256d,
7782 k: __mmask8,
7783 a: __m256d,
7784) -> __m256d {
7785 unsafe {
7786 static_assert_uimm_bits!(NORM, 4);
7787 static_assert_uimm_bits!(SIGN, 2);
7788 let a: Simd = a.as_f64x4();
7789 let src: Simd = src.as_f64x4();
7790 let r: Simd = vgetmantpd256(a, SIGN << 2 | NORM, src, m:k);
7791 transmute(src:r)
7792 }
7793}
7794
7795/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7796/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7797/// _MM_MANT_NORM_1_2 // interval [1, 2)\
7798/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
7799/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
7800/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7801/// The sign is determined by sc which can take the following values:\
7802/// _MM_MANT_SIGN_src // sign = sign(src)\
7803/// _MM_MANT_SIGN_zero // sign = 0\
7804/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
7805///
7806/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_getmant_pd&expand=2870)
7807#[inline]
7808#[target_feature(enable = "avx512f,avx512vl")]
7809#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7810#[cfg_attr(test, assert_instr(vgetmantpd, NORM = 0, SIGN = 0))]
7811#[rustc_legacy_const_generics(2, 3)]
7812pub fn _mm256_maskz_getmant_pd<
7813 const NORM: _MM_MANTISSA_NORM_ENUM,
7814 const SIGN: _MM_MANTISSA_SIGN_ENUM,
7815>(
7816 k: __mmask8,
7817 a: __m256d,
7818) -> __m256d {
7819 unsafe {
7820 static_assert_uimm_bits!(NORM, 4);
7821 static_assert_uimm_bits!(SIGN, 2);
7822 let a: Simd = a.as_f64x4();
7823 let r: Simd = vgetmantpd256(a, SIGN << 2 | NORM, src:f64x4::ZERO, m:k);
7824 transmute(src:r)
7825 }
7826}
7827
7828/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7829/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7830/// _MM_MANT_NORM_1_2 // interval [1, 2)\
7831/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
7832/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
7833/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7834/// The sign is determined by sc which can take the following values:\
7835/// _MM_MANT_SIGN_src // sign = sign(src)\
7836/// _MM_MANT_SIGN_zero // sign = 0\
7837/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
7838///
7839/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_getmant_pd&expand=2865)
7840#[inline]
7841#[target_feature(enable = "avx512f,avx512vl")]
7842#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7843#[cfg_attr(test, assert_instr(vgetmantpd, NORM = 0, SIGN = 0))]
7844#[rustc_legacy_const_generics(1, 2)]
7845pub fn _mm_getmant_pd<const NORM: _MM_MANTISSA_NORM_ENUM, const SIGN: _MM_MANTISSA_SIGN_ENUM>(
7846 a: __m128d,
7847) -> __m128d {
7848 unsafe {
7849 static_assert_uimm_bits!(NORM, 4);
7850 static_assert_uimm_bits!(SIGN, 2);
7851 let a: Simd = a.as_f64x2();
7852 let r: Simd = vgetmantpd128(a, SIGN << 2 | NORM, src:f64x2::ZERO, m:0b00000011);
7853 transmute(src:r)
7854 }
7855}
7856
7857/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7858/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7859/// _MM_MANT_NORM_1_2 // interval [1, 2)\
7860/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
7861/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
7862/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7863/// The sign is determined by sc which can take the following values:\
7864/// _MM_MANT_SIGN_src // sign = sign(src)\
7865/// _MM_MANT_SIGN_zero // sign = 0\
7866/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
7867///
7868/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_getmant_pd&expand=2866)
7869#[inline]
7870#[target_feature(enable = "avx512f,avx512vl")]
7871#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7872#[cfg_attr(test, assert_instr(vgetmantpd, NORM = 0, SIGN = 0))]
7873#[rustc_legacy_const_generics(3, 4)]
7874pub fn _mm_mask_getmant_pd<
7875 const NORM: _MM_MANTISSA_NORM_ENUM,
7876 const SIGN: _MM_MANTISSA_SIGN_ENUM,
7877>(
7878 src: __m128d,
7879 k: __mmask8,
7880 a: __m128d,
7881) -> __m128d {
7882 unsafe {
7883 static_assert_uimm_bits!(NORM, 4);
7884 static_assert_uimm_bits!(SIGN, 2);
7885 let a: Simd = a.as_f64x2();
7886 let src: Simd = src.as_f64x2();
7887 let r: Simd = vgetmantpd128(a, SIGN << 2 | NORM, src, m:k);
7888 transmute(src:r)
7889 }
7890}
7891
7892/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7893/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7894/// _MM_MANT_NORM_1_2 // interval [1, 2)\
7895/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
7896/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
7897/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7898/// The sign is determined by sc which can take the following values:\
7899/// _MM_MANT_SIGN_src // sign = sign(src)\
7900/// _MM_MANT_SIGN_zero // sign = 0\
7901/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
7902///
7903/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_getmant_pd&expand=2867)
7904#[inline]
7905#[target_feature(enable = "avx512f,avx512vl")]
7906#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7907#[cfg_attr(test, assert_instr(vgetmantpd, NORM = 0, SIGN = 0))]
7908#[rustc_legacy_const_generics(2, 3)]
7909pub fn _mm_maskz_getmant_pd<
7910 const NORM: _MM_MANTISSA_NORM_ENUM,
7911 const SIGN: _MM_MANTISSA_SIGN_ENUM,
7912>(
7913 k: __mmask8,
7914 a: __m128d,
7915) -> __m128d {
7916 unsafe {
7917 static_assert_uimm_bits!(NORM, 4);
7918 static_assert_uimm_bits!(SIGN, 2);
7919 let a: Simd = a.as_f64x2();
7920 let r: Simd = vgetmantpd128(a, SIGN << 2 | NORM, src:f64x2::ZERO, m:k);
7921 transmute(src:r)
7922 }
7923}
7924
7925/// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst.\
7926///
7927/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7928/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7929/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7930/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7931/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7932/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7933///
7934/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_add_round_ps&expand=145)
7935#[inline]
7936#[target_feature(enable = "avx512f")]
7937#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7938#[cfg_attr(test, assert_instr(vaddps, ROUNDING = 8))]
7939#[rustc_legacy_const_generics(2)]
7940pub fn _mm512_add_round_ps<const ROUNDING: i32>(a: __m512, b: __m512) -> __m512 {
7941 unsafe {
7942 static_assert_rounding!(ROUNDING);
7943 let a: Simd = a.as_f32x16();
7944 let b: Simd = b.as_f32x16();
7945 let r: Simd = vaddps(a, b, ROUNDING);
7946 transmute(src:r)
7947 }
7948}
7949
7950/// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
7951///
7952/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7953/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7954/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7955/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7956/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7957/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7958///
7959/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_add_round_ps&expand=146)
7960#[inline]
7961#[target_feature(enable = "avx512f")]
7962#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7963#[cfg_attr(test, assert_instr(vaddps, ROUNDING = 8))]
7964#[rustc_legacy_const_generics(4)]
7965pub fn _mm512_mask_add_round_ps<const ROUNDING: i32>(
7966 src: __m512,
7967 k: __mmask16,
7968 a: __m512,
7969 b: __m512,
7970) -> __m512 {
7971 unsafe {
7972 static_assert_rounding!(ROUNDING);
7973 let a: Simd = a.as_f32x16();
7974 let b: Simd = b.as_f32x16();
7975 let r: Simd = vaddps(a, b, ROUNDING);
7976 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_f32x16()))
7977 }
7978}
7979
7980/// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
7981///
7982/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7983/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7984/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7985/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7986/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7987/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7988///
7989/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_add_round_ps&expand=147)
7990#[inline]
7991#[target_feature(enable = "avx512f")]
7992#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7993#[cfg_attr(test, assert_instr(vaddps, ROUNDING = 8))]
7994#[rustc_legacy_const_generics(3)]
7995pub fn _mm512_maskz_add_round_ps<const ROUNDING: i32>(
7996 k: __mmask16,
7997 a: __m512,
7998 b: __m512,
7999) -> __m512 {
8000 unsafe {
8001 static_assert_rounding!(ROUNDING);
8002 let a: Simd = a.as_f32x16();
8003 let b: Simd = b.as_f32x16();
8004 let r: Simd = vaddps(a, b, ROUNDING);
8005 transmute(src:simd_select_bitmask(m:k, yes:r, no:f32x16::ZERO))
8006 }
8007}
8008
8009/// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst.\
8010///
8011/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8012/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8013/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8014/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8015/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8016/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8017///
8018/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_add_round_pd&expand=142)
8019#[inline]
8020#[target_feature(enable = "avx512f")]
8021#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8022#[cfg_attr(test, assert_instr(vaddpd, ROUNDING = 8))]
8023#[rustc_legacy_const_generics(2)]
8024pub fn _mm512_add_round_pd<const ROUNDING: i32>(a: __m512d, b: __m512d) -> __m512d {
8025 unsafe {
8026 static_assert_rounding!(ROUNDING);
8027 let a: Simd = a.as_f64x8();
8028 let b: Simd = b.as_f64x8();
8029 let r: Simd = vaddpd(a, b, ROUNDING);
8030 transmute(src:r)
8031 }
8032}
8033
8034/// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
8035///
8036/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8037/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8038/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8039/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8040/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8041/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8042///
8043/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_add_round_pd&expand=143)
8044#[inline]
8045#[target_feature(enable = "avx512f")]
8046#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8047#[cfg_attr(test, assert_instr(vaddpd, ROUNDING = 8))]
8048#[rustc_legacy_const_generics(4)]
8049pub fn _mm512_mask_add_round_pd<const ROUNDING: i32>(
8050 src: __m512d,
8051 k: __mmask8,
8052 a: __m512d,
8053 b: __m512d,
8054) -> __m512d {
8055 unsafe {
8056 static_assert_rounding!(ROUNDING);
8057 let a: Simd = a.as_f64x8();
8058 let b: Simd = b.as_f64x8();
8059 let r: Simd = vaddpd(a, b, ROUNDING);
8060 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_f64x8()))
8061 }
8062}
8063
8064/// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8065///
8066/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8067/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8068/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8069/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8070/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8071/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8072///
8073/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_add_round_pd&expand=144)
8074#[inline]
8075#[target_feature(enable = "avx512f")]
8076#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8077#[cfg_attr(test, assert_instr(vaddpd, ROUNDING = 8))]
8078#[rustc_legacy_const_generics(3)]
8079pub fn _mm512_maskz_add_round_pd<const ROUNDING: i32>(
8080 k: __mmask8,
8081 a: __m512d,
8082 b: __m512d,
8083) -> __m512d {
8084 unsafe {
8085 static_assert_rounding!(ROUNDING);
8086 let a: Simd = a.as_f64x8();
8087 let b: Simd = b.as_f64x8();
8088 let r: Simd = vaddpd(a, b, ROUNDING);
8089 transmute(src:simd_select_bitmask(m:k, yes:r, no:f64x8::ZERO))
8090 }
8091}
8092
8093/// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst.\
8094///
8095/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8096/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8097/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8098/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8099/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8100/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8101///
8102/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sub_round_ps&expand=5739)
8103#[inline]
8104#[target_feature(enable = "avx512f")]
8105#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8106#[cfg_attr(test, assert_instr(vsubps, ROUNDING = 8))]
8107#[rustc_legacy_const_generics(2)]
8108pub fn _mm512_sub_round_ps<const ROUNDING: i32>(a: __m512, b: __m512) -> __m512 {
8109 unsafe {
8110 static_assert_rounding!(ROUNDING);
8111 let a: Simd = a.as_f32x16();
8112 let b: Simd = b.as_f32x16();
8113 let r: Simd = vsubps(a, b, ROUNDING);
8114 transmute(src:r)
8115 }
8116}
8117
8118/// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
8119///
8120/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8121/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8122/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8123/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8124/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8125/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8126///
8127/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sub_round_ps&expand=5737)
8128#[inline]
8129#[target_feature(enable = "avx512f")]
8130#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8131#[cfg_attr(test, assert_instr(vsubps, ROUNDING = 8))]
8132#[rustc_legacy_const_generics(4)]
8133pub fn _mm512_mask_sub_round_ps<const ROUNDING: i32>(
8134 src: __m512,
8135 k: __mmask16,
8136 a: __m512,
8137 b: __m512,
8138) -> __m512 {
8139 unsafe {
8140 static_assert_rounding!(ROUNDING);
8141 let a: Simd = a.as_f32x16();
8142 let b: Simd = b.as_f32x16();
8143 let r: Simd = vsubps(a, b, ROUNDING);
8144 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_f32x16()))
8145 }
8146}
8147
8148/// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8149///
8150/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8151/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8152/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8153/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8154/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8155/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8156///
8157/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sub_round_ps&expand=5738)
8158#[inline]
8159#[target_feature(enable = "avx512f")]
8160#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8161#[cfg_attr(test, assert_instr(vsubps, ROUNDING = 8))]
8162#[rustc_legacy_const_generics(3)]
8163pub fn _mm512_maskz_sub_round_ps<const ROUNDING: i32>(
8164 k: __mmask16,
8165 a: __m512,
8166 b: __m512,
8167) -> __m512 {
8168 unsafe {
8169 static_assert_rounding!(ROUNDING);
8170 let a: Simd = a.as_f32x16();
8171 let b: Simd = b.as_f32x16();
8172 let r: Simd = vsubps(a, b, ROUNDING);
8173 transmute(src:simd_select_bitmask(m:k, yes:r, no:f32x16::ZERO))
8174 }
8175}
8176
8177/// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst.\
8178///
8179/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8180/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8181/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8182/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8183/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8184/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8185///
8186/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sub_round_pd&expand=5736)
8187#[inline]
8188#[target_feature(enable = "avx512f")]
8189#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8190#[cfg_attr(test, assert_instr(vsubpd, ROUNDING = 8))]
8191#[rustc_legacy_const_generics(2)]
8192pub fn _mm512_sub_round_pd<const ROUNDING: i32>(a: __m512d, b: __m512d) -> __m512d {
8193 unsafe {
8194 static_assert_rounding!(ROUNDING);
8195 let a: Simd = a.as_f64x8();
8196 let b: Simd = b.as_f64x8();
8197 let r: Simd = vsubpd(a, b, ROUNDING);
8198 transmute(src:r)
8199 }
8200}
8201
8202/// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
8203///
8204/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8205/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8206/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8207/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8208/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8209/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8210///
8211/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sub_round_pd&expand=5734)
8212#[inline]
8213#[target_feature(enable = "avx512f")]
8214#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8215#[cfg_attr(test, assert_instr(vsubpd, ROUNDING = 8))]
8216#[rustc_legacy_const_generics(4)]
8217pub fn _mm512_mask_sub_round_pd<const ROUNDING: i32>(
8218 src: __m512d,
8219 k: __mmask8,
8220 a: __m512d,
8221 b: __m512d,
8222) -> __m512d {
8223 unsafe {
8224 static_assert_rounding!(ROUNDING);
8225 let a: Simd = a.as_f64x8();
8226 let b: Simd = b.as_f64x8();
8227 let r: Simd = vsubpd(a, b, ROUNDING);
8228 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_f64x8()))
8229 }
8230}
8231
8232/// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8233///
8234/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8235/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8236/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8237/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8238/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8239/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8240///
8241/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sub_round_pd&expand=5735)
8242#[inline]
8243#[target_feature(enable = "avx512f")]
8244#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8245#[cfg_attr(test, assert_instr(vsubpd, ROUNDING = 8))]
8246#[rustc_legacy_const_generics(3)]
8247pub fn _mm512_maskz_sub_round_pd<const ROUNDING: i32>(
8248 k: __mmask8,
8249 a: __m512d,
8250 b: __m512d,
8251) -> __m512d {
8252 unsafe {
8253 static_assert_rounding!(ROUNDING);
8254 let a: Simd = a.as_f64x8();
8255 let b: Simd = b.as_f64x8();
8256 let r: Simd = vsubpd(a, b, ROUNDING);
8257 transmute(src:simd_select_bitmask(m:k, yes:r, no:f64x8::ZERO))
8258 }
8259}
8260
8261/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst.\
8262///
8263/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8264/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8265/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8266/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8267/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8268/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8269///
8270/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mul_round_ps&expand=3940)
8271#[inline]
8272#[target_feature(enable = "avx512f")]
8273#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8274#[cfg_attr(test, assert_instr(vmulps, ROUNDING = 8))]
8275#[rustc_legacy_const_generics(2)]
8276pub fn _mm512_mul_round_ps<const ROUNDING: i32>(a: __m512, b: __m512) -> __m512 {
8277 unsafe {
8278 static_assert_rounding!(ROUNDING);
8279 let a: Simd = a.as_f32x16();
8280 let b: Simd = b.as_f32x16();
8281 let r: Simd = vmulps(a, b, ROUNDING);
8282 transmute(src:r)
8283 }
8284}
8285
8286/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
8287///
8288/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8289/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8290/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8291/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8292/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8293/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8294///
8295/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mul_round_ps&expand=3938)
8296#[inline]
8297#[target_feature(enable = "avx512f")]
8298#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8299#[cfg_attr(test, assert_instr(vmulps, ROUNDING = 8))]
8300#[rustc_legacy_const_generics(4)]
8301pub fn _mm512_mask_mul_round_ps<const ROUNDING: i32>(
8302 src: __m512,
8303 k: __mmask16,
8304 a: __m512,
8305 b: __m512,
8306) -> __m512 {
8307 unsafe {
8308 static_assert_rounding!(ROUNDING);
8309 let a: Simd = a.as_f32x16();
8310 let b: Simd = b.as_f32x16();
8311 let r: Simd = vmulps(a, b, ROUNDING);
8312 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_f32x16()))
8313 }
8314}
8315
8316/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8317///
8318/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8319/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8320/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8321/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8322/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8323/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8324///
8325/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mul_round_ps&expand=3939)
8326#[inline]
8327#[target_feature(enable = "avx512f")]
8328#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8329#[cfg_attr(test, assert_instr(vmulps, ROUNDING = 8))]
8330#[rustc_legacy_const_generics(3)]
8331pub fn _mm512_maskz_mul_round_ps<const ROUNDING: i32>(
8332 k: __mmask16,
8333 a: __m512,
8334 b: __m512,
8335) -> __m512 {
8336 unsafe {
8337 static_assert_rounding!(ROUNDING);
8338 let a: Simd = a.as_f32x16();
8339 let b: Simd = b.as_f32x16();
8340 let r: Simd = vmulps(a, b, ROUNDING);
8341 transmute(src:simd_select_bitmask(m:k, yes:r, no:f32x16::ZERO))
8342 }
8343}
8344
8345/// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst.\
8346///
8347/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8348/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8349/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8350/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8351/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8352/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8353///
8354/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mul_round_pd&expand=3937)
8355#[inline]
8356#[target_feature(enable = "avx512f")]
8357#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8358#[cfg_attr(test, assert_instr(vmulpd, ROUNDING = 8))]
8359#[rustc_legacy_const_generics(2)]
8360pub fn _mm512_mul_round_pd<const ROUNDING: i32>(a: __m512d, b: __m512d) -> __m512d {
8361 unsafe {
8362 static_assert_rounding!(ROUNDING);
8363 let a: Simd = a.as_f64x8();
8364 let b: Simd = b.as_f64x8();
8365 let r: Simd = vmulpd(a, b, ROUNDING);
8366 transmute(src:r)
8367 }
8368}
8369
8370/// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
8371///
8372/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8373/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8374/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8375/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8376/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8377/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8378///
8379/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mul_round_pd&expand=3935)
8380#[inline]
8381#[target_feature(enable = "avx512f")]
8382#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8383#[cfg_attr(test, assert_instr(vmulpd, ROUNDING = 8))]
8384#[rustc_legacy_const_generics(4)]
8385pub fn _mm512_mask_mul_round_pd<const ROUNDING: i32>(
8386 src: __m512d,
8387 k: __mmask8,
8388 a: __m512d,
8389 b: __m512d,
8390) -> __m512d {
8391 unsafe {
8392 static_assert_rounding!(ROUNDING);
8393 let a: Simd = a.as_f64x8();
8394 let b: Simd = b.as_f64x8();
8395 let r: Simd = vmulpd(a, b, ROUNDING);
8396 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_f64x8()))
8397 }
8398}
8399
8400/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8401///
8402/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8403/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8404/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8405/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8406/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8407/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8408///
8409/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mul_round_pd&expand=3939)
8410#[inline]
8411#[target_feature(enable = "avx512f")]
8412#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8413#[cfg_attr(test, assert_instr(vmulpd, ROUNDING = 8))]
8414#[rustc_legacy_const_generics(3)]
8415pub fn _mm512_maskz_mul_round_pd<const ROUNDING: i32>(
8416 k: __mmask8,
8417 a: __m512d,
8418 b: __m512d,
8419) -> __m512d {
8420 unsafe {
8421 static_assert_rounding!(ROUNDING);
8422 let a: Simd = a.as_f64x8();
8423 let b: Simd = b.as_f64x8();
8424 let r: Simd = vmulpd(a, b, ROUNDING);
8425 transmute(src:simd_select_bitmask(m:k, yes:r, no:f64x8::ZERO))
8426 }
8427}
8428
8429/// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst.\
8430///
8431/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8432/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8433/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8434/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8435/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8436/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8437///
8438/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_div_round_ps&expand=2168)
8439#[inline]
8440#[target_feature(enable = "avx512f")]
8441#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8442#[cfg_attr(test, assert_instr(vdivps, ROUNDING = 8))]
8443#[rustc_legacy_const_generics(2)]
8444pub fn _mm512_div_round_ps<const ROUNDING: i32>(a: __m512, b: __m512) -> __m512 {
8445 unsafe {
8446 static_assert_rounding!(ROUNDING);
8447 let a: Simd = a.as_f32x16();
8448 let b: Simd = b.as_f32x16();
8449 let r: Simd = vdivps(a, b, ROUNDING);
8450 transmute(src:r)
8451 }
8452}
8453
8454/// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
8455///
8456/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8457/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8458/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8459/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8460/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8461/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8462///
8463/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_div_round_ps&expand=2169)
8464#[inline]
8465#[target_feature(enable = "avx512f")]
8466#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8467#[cfg_attr(test, assert_instr(vdivps, ROUNDING = 8))]
8468#[rustc_legacy_const_generics(4)]
8469pub fn _mm512_mask_div_round_ps<const ROUNDING: i32>(
8470 src: __m512,
8471 k: __mmask16,
8472 a: __m512,
8473 b: __m512,
8474) -> __m512 {
8475 unsafe {
8476 static_assert_rounding!(ROUNDING);
8477 let a: Simd = a.as_f32x16();
8478 let b: Simd = b.as_f32x16();
8479 let r: Simd = vdivps(a, b, ROUNDING);
8480 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_f32x16()))
8481 }
8482}
8483
8484/// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8485///
8486/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8487/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8488/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8489/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8490/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8491/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8492///
8493/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_div_round_ps&expand=2170)
8494#[inline]
8495#[target_feature(enable = "avx512f")]
8496#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8497#[cfg_attr(test, assert_instr(vdivps, ROUNDING = 8))]
8498#[rustc_legacy_const_generics(3)]
8499pub fn _mm512_maskz_div_round_ps<const ROUNDING: i32>(
8500 k: __mmask16,
8501 a: __m512,
8502 b: __m512,
8503) -> __m512 {
8504 unsafe {
8505 static_assert_rounding!(ROUNDING);
8506 let a: Simd = a.as_f32x16();
8507 let b: Simd = b.as_f32x16();
8508 let r: Simd = vdivps(a, b, ROUNDING);
8509 transmute(src:simd_select_bitmask(m:k, yes:r, no:f32x16::ZERO))
8510 }
8511}
8512
8513/// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, =and store the results in dst.\
8514///
8515/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8516/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8517/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8518/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8519/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8520/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8521///
8522/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_div_round_pd&expand=2165)
8523#[inline]
8524#[target_feature(enable = "avx512f")]
8525#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8526#[cfg_attr(test, assert_instr(vdivpd, ROUNDING = 8))]
8527#[rustc_legacy_const_generics(2)]
8528pub fn _mm512_div_round_pd<const ROUNDING: i32>(a: __m512d, b: __m512d) -> __m512d {
8529 unsafe {
8530 static_assert_rounding!(ROUNDING);
8531 let a: Simd = a.as_f64x8();
8532 let b: Simd = b.as_f64x8();
8533 let r: Simd = vdivpd(a, b, ROUNDING);
8534 transmute(src:r)
8535 }
8536}
8537
8538/// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
8539///
8540/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8541/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8542/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8543/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8544/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8545/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8546///
8547/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_div_round_pd&expand=2166)
8548#[inline]
8549#[target_feature(enable = "avx512f")]
8550#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8551#[cfg_attr(test, assert_instr(vdivpd, ROUNDING = 8))]
8552#[rustc_legacy_const_generics(4)]
8553pub fn _mm512_mask_div_round_pd<const ROUNDING: i32>(
8554 src: __m512d,
8555 k: __mmask8,
8556 a: __m512d,
8557 b: __m512d,
8558) -> __m512d {
8559 unsafe {
8560 static_assert_rounding!(ROUNDING);
8561 let a: Simd = a.as_f64x8();
8562 let b: Simd = b.as_f64x8();
8563 let r: Simd = vdivpd(a, b, ROUNDING);
8564 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_f64x8()))
8565 }
8566}
8567
8568/// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8569///
8570/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8571/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8572/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8573/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8574/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8575/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8576///
8577/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_div_round_pd&expand=2167)
8578#[inline]
8579#[target_feature(enable = "avx512f")]
8580#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8581#[cfg_attr(test, assert_instr(vdivpd, ROUNDING = 8))]
8582#[rustc_legacy_const_generics(3)]
8583pub fn _mm512_maskz_div_round_pd<const ROUNDING: i32>(
8584 k: __mmask8,
8585 a: __m512d,
8586 b: __m512d,
8587) -> __m512d {
8588 unsafe {
8589 static_assert_rounding!(ROUNDING);
8590 let a: Simd = a.as_f64x8();
8591 let b: Simd = b.as_f64x8();
8592 let r: Simd = vdivpd(a, b, ROUNDING);
8593 transmute(src:simd_select_bitmask(m:k, yes:r, no:f64x8::ZERO))
8594 }
8595}
8596
8597/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst.\
8598///
8599/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8600/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8601/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8602/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8603/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8604/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8605///
8606/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sqrt_round_ps&expand=5377)
8607#[inline]
8608#[target_feature(enable = "avx512f")]
8609#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8610#[cfg_attr(test, assert_instr(vsqrtps, ROUNDING = 8))]
8611#[rustc_legacy_const_generics(1)]
8612pub fn _mm512_sqrt_round_ps<const ROUNDING: i32>(a: __m512) -> __m512 {
8613 unsafe {
8614 static_assert_rounding!(ROUNDING);
8615 let a: Simd = a.as_f32x16();
8616 let r: Simd = vsqrtps(a, ROUNDING);
8617 transmute(src:r)
8618 }
8619}
8620
8621/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
8622///
8623/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8624/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8625/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8626/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8627/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8628/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8629///
8630/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sqrt_round_ps&expand=5375)
8631#[inline]
8632#[target_feature(enable = "avx512f")]
8633#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8634#[cfg_attr(test, assert_instr(vsqrtps, ROUNDING = 8))]
8635#[rustc_legacy_const_generics(3)]
8636pub fn _mm512_mask_sqrt_round_ps<const ROUNDING: i32>(
8637 src: __m512,
8638 k: __mmask16,
8639 a: __m512,
8640) -> __m512 {
8641 unsafe {
8642 static_assert_rounding!(ROUNDING);
8643 let a: Simd = a.as_f32x16();
8644 let r: Simd = vsqrtps(a, ROUNDING);
8645 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_f32x16()))
8646 }
8647}
8648
8649/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8650///
8651/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8652/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8653/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8654/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8655/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8656/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8657///
8658/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sqrt_round_ps&expand=5376)
8659#[inline]
8660#[target_feature(enable = "avx512f")]
8661#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8662#[cfg_attr(test, assert_instr(vsqrtps, ROUNDING = 8))]
8663#[rustc_legacy_const_generics(2)]
8664pub fn _mm512_maskz_sqrt_round_ps<const ROUNDING: i32>(k: __mmask16, a: __m512) -> __m512 {
8665 unsafe {
8666 static_assert_rounding!(ROUNDING);
8667 let a: Simd = a.as_f32x16();
8668 let r: Simd = vsqrtps(a, ROUNDING);
8669 transmute(src:simd_select_bitmask(m:k, yes:r, no:f32x16::ZERO))
8670 }
8671}
8672
8673/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst.\
8674///
8675/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8676/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8677/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8678/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8679/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8680/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8681///
8682/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sqrt_round_pd&expand=5374)
8683#[inline]
8684#[target_feature(enable = "avx512f")]
8685#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8686#[cfg_attr(test, assert_instr(vsqrtpd, ROUNDING = 8))]
8687#[rustc_legacy_const_generics(1)]
8688pub fn _mm512_sqrt_round_pd<const ROUNDING: i32>(a: __m512d) -> __m512d {
8689 unsafe {
8690 static_assert_rounding!(ROUNDING);
8691 let a: Simd = a.as_f64x8();
8692 let r: Simd = vsqrtpd(a, ROUNDING);
8693 transmute(src:r)
8694 }
8695}
8696
8697/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
8698///
8699/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8700/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8701/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8702/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8703/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8704/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8705///
8706/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sqrt_round_pd&expand=5372)
8707#[inline]
8708#[target_feature(enable = "avx512f")]
8709#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8710#[cfg_attr(test, assert_instr(vsqrtpd, ROUNDING = 8))]
8711#[rustc_legacy_const_generics(3)]
8712pub fn _mm512_mask_sqrt_round_pd<const ROUNDING: i32>(
8713 src: __m512d,
8714 k: __mmask8,
8715 a: __m512d,
8716) -> __m512d {
8717 unsafe {
8718 static_assert_rounding!(ROUNDING);
8719 let a: Simd = a.as_f64x8();
8720 let r: Simd = vsqrtpd(a, ROUNDING);
8721 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_f64x8()))
8722 }
8723}
8724
8725/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8726///
8727/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8728/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8729/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8730/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8731/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8732/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8733///
8734/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sqrt_round_pd&expand=5373)
8735#[inline]
8736#[target_feature(enable = "avx512f")]
8737#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8738#[cfg_attr(test, assert_instr(vsqrtpd, ROUNDING = 8))]
8739#[rustc_legacy_const_generics(2)]
8740pub fn _mm512_maskz_sqrt_round_pd<const ROUNDING: i32>(k: __mmask8, a: __m512d) -> __m512d {
8741 unsafe {
8742 static_assert_rounding!(ROUNDING);
8743 let a: Simd = a.as_f64x8();
8744 let r: Simd = vsqrtpd(a, ROUNDING);
8745 transmute(src:simd_select_bitmask(m:k, yes:r, no:f64x8::ZERO))
8746 }
8747}
8748
8749/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst.\
8750///
8751/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8752/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8753/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8754/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8755/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8756/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8757///
8758/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmadd_round_ps&expand=2565)
8759#[inline]
8760#[target_feature(enable = "avx512f")]
8761#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8762#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
8763#[rustc_legacy_const_generics(3)]
8764pub fn _mm512_fmadd_round_ps<const ROUNDING: i32>(a: __m512, b: __m512, c: __m512) -> __m512 {
8765 unsafe {
8766 static_assert_rounding!(ROUNDING);
8767 vfmadd132psround(a, b, c, ROUNDING)
8768 }
8769}
8770
8771/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
8772///
8773/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8774/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8775/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8776/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8777/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8778/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8779///
8780/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmadd_round_ps&expand=2566)
8781#[inline]
8782#[target_feature(enable = "avx512f")]
8783#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8784#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
8785#[rustc_legacy_const_generics(4)]
8786pub fn _mm512_mask_fmadd_round_ps<const ROUNDING: i32>(
8787 a: __m512,
8788 k: __mmask16,
8789 b: __m512,
8790 c: __m512,
8791) -> __m512 {
8792 unsafe {
8793 static_assert_rounding!(ROUNDING);
8794 simd_select_bitmask(m:k, yes:vfmadd132psround(a, b, c, ROUNDING), no:a)
8795 }
8796}
8797
8798/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in a using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8799///
8800/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8801/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8802/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8803/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8804/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8805/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8806///
8807/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmadd_round_ps&expand=2568)
8808#[inline]
8809#[target_feature(enable = "avx512f")]
8810#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8811#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
8812#[rustc_legacy_const_generics(4)]
8813pub fn _mm512_maskz_fmadd_round_ps<const ROUNDING: i32>(
8814 k: __mmask16,
8815 a: __m512,
8816 b: __m512,
8817 c: __m512,
8818) -> __m512 {
8819 unsafe {
8820 static_assert_rounding!(ROUNDING);
8821 simd_select_bitmask(m:k, yes:vfmadd132psround(a, b, c, ROUNDING), no:_mm512_setzero_ps())
8822 }
8823}
8824
8825/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
8826///
8827/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8828/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8829/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8830/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8831/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8832/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8833///
8834/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmadd_round_ps&expand=2567)
8835#[inline]
8836#[target_feature(enable = "avx512f")]
8837#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8838#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
8839#[rustc_legacy_const_generics(4)]
8840pub fn _mm512_mask3_fmadd_round_ps<const ROUNDING: i32>(
8841 a: __m512,
8842 b: __m512,
8843 c: __m512,
8844 k: __mmask16,
8845) -> __m512 {
8846 unsafe {
8847 static_assert_rounding!(ROUNDING);
8848 simd_select_bitmask(m:k, yes:vfmadd132psround(a, b, c, ROUNDING), no:c)
8849 }
8850}
8851
8852/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst.\
8853///
8854/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8855/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8856/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8857/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8858/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8859/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8860///
8861/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmadd_round_pd&expand=2561)
8862#[inline]
8863#[target_feature(enable = "avx512f")]
8864#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8865#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
8866#[rustc_legacy_const_generics(3)]
8867pub fn _mm512_fmadd_round_pd<const ROUNDING: i32>(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
8868 unsafe {
8869 static_assert_rounding!(ROUNDING);
8870 vfmadd132pdround(a, b, c, ROUNDING)
8871 }
8872}
8873
8874/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
8875///
8876/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8877/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8878/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8879/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8880/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8881/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8882///
8883/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmadd_round_pd&expand=2562)
8884#[inline]
8885#[target_feature(enable = "avx512f")]
8886#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8887#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
8888#[rustc_legacy_const_generics(4)]
8889pub fn _mm512_mask_fmadd_round_pd<const ROUNDING: i32>(
8890 a: __m512d,
8891 k: __mmask8,
8892 b: __m512d,
8893 c: __m512d,
8894) -> __m512d {
8895 unsafe {
8896 static_assert_rounding!(ROUNDING);
8897 simd_select_bitmask(m:k, yes:vfmadd132pdround(a, b, c, ROUNDING), no:a)
8898 }
8899}
8900
8901/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8902///
8903/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8904/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8905/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8906/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8907/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8908/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8909///
8910/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmadd_round_pd&expand=2564)
8911#[inline]
8912#[target_feature(enable = "avx512f")]
8913#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8914#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
8915#[rustc_legacy_const_generics(4)]
8916pub fn _mm512_maskz_fmadd_round_pd<const ROUNDING: i32>(
8917 k: __mmask8,
8918 a: __m512d,
8919 b: __m512d,
8920 c: __m512d,
8921) -> __m512d {
8922 unsafe {
8923 static_assert_rounding!(ROUNDING);
8924 simd_select_bitmask(m:k, yes:vfmadd132pdround(a, b, c, ROUNDING), no:_mm512_setzero_pd())
8925 }
8926}
8927
8928/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
8929///
8930/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8931/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8932/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8933/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8934/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8935/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8936///
8937/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmadd_round_pd&expand=2563)
8938#[inline]
8939#[target_feature(enable = "avx512f")]
8940#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8941#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
8942#[rustc_legacy_const_generics(4)]
8943pub fn _mm512_mask3_fmadd_round_pd<const ROUNDING: i32>(
8944 a: __m512d,
8945 b: __m512d,
8946 c: __m512d,
8947 k: __mmask8,
8948) -> __m512d {
8949 unsafe {
8950 static_assert_rounding!(ROUNDING);
8951 simd_select_bitmask(m:k, yes:vfmadd132pdround(a, b, c, ROUNDING), no:c)
8952 }
8953}
8954
8955/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst.\
8956///
8957/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8958/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8959/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8960/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8961/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8962/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8963///
8964/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmsub_round_ps&expand=2651)
8965#[inline]
8966#[target_feature(enable = "avx512f")]
8967#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8968#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generates vfmadd, gcc generates vfmsub
8969#[rustc_legacy_const_generics(3)]
8970pub fn _mm512_fmsub_round_ps<const ROUNDING: i32>(a: __m512, b: __m512, c: __m512) -> __m512 {
8971 unsafe {
8972 static_assert_rounding!(ROUNDING);
8973 vfmadd132psround(a, b, c:simd_neg(c), ROUNDING)
8974 }
8975}
8976
8977/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
8978///
8979/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8980/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8981/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8982/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8983/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8984/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8985///
8986/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmsub_round_ps&expand=2652)
8987#[inline]
8988#[target_feature(enable = "avx512f")]
8989#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8990#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generates vfmadd, gcc generates vfmsub
8991#[rustc_legacy_const_generics(4)]
8992pub fn _mm512_mask_fmsub_round_ps<const ROUNDING: i32>(
8993 a: __m512,
8994 k: __mmask16,
8995 b: __m512,
8996 c: __m512,
8997) -> __m512 {
8998 unsafe {
8999 static_assert_rounding!(ROUNDING);
9000 let r: __m512 = vfmadd132psround(a, b, c:simd_neg(c), ROUNDING);
9001 simd_select_bitmask(m:k, yes:r, no:a)
9002 }
9003}
9004
9005/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
9006///
9007/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9008/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9009/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9010/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9011/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9012/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9013///
9014/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmsub_round_ps&expand=2654)
9015#[inline]
9016#[target_feature(enable = "avx512f")]
9017#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9018#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generates vfmadd, gcc generates vfmsub
9019#[rustc_legacy_const_generics(4)]
9020pub fn _mm512_maskz_fmsub_round_ps<const ROUNDING: i32>(
9021 k: __mmask16,
9022 a: __m512,
9023 b: __m512,
9024 c: __m512,
9025) -> __m512 {
9026 unsafe {
9027 static_assert_rounding!(ROUNDING);
9028 let r: __m512 = vfmadd132psround(a, b, c:simd_neg(c), ROUNDING);
9029 simd_select_bitmask(m:k, yes:r, no:_mm512_setzero_ps())
9030 }
9031}
9032
9033/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
9034///
9035/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9036/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9037/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9038/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9039/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9040/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9041///
9042/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmsub_round_ps&expand=2653)
9043#[inline]
9044#[target_feature(enable = "avx512f")]
9045#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9046#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generates vfmadd, gcc generates vfmsub
9047#[rustc_legacy_const_generics(4)]
9048pub fn _mm512_mask3_fmsub_round_ps<const ROUNDING: i32>(
9049 a: __m512,
9050 b: __m512,
9051 c: __m512,
9052 k: __mmask16,
9053) -> __m512 {
9054 unsafe {
9055 static_assert_rounding!(ROUNDING);
9056 let r: __m512 = vfmadd132psround(a, b, c:simd_neg(c), ROUNDING);
9057 simd_select_bitmask(m:k, yes:r, no:c)
9058 }
9059}
9060
9061/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst.\
9062///
9063/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9064/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9065/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9066/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9067/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9068/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9069///
9070/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmsub_round_pd&expand=2647)
9071#[inline]
9072#[target_feature(enable = "avx512f")]
9073#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9074#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang generates fmadd, gcc generates fmsub
9075#[rustc_legacy_const_generics(3)]
9076pub fn _mm512_fmsub_round_pd<const ROUNDING: i32>(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
9077 unsafe {
9078 static_assert_rounding!(ROUNDING);
9079 vfmadd132pdround(a, b, c:simd_neg(c), ROUNDING)
9080 }
9081}
9082
9083/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
9084///
9085/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9086/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9087/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9088/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9089/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9090/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9091///
9092/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmsub_round_pd&expand=2648)
9093#[inline]
9094#[target_feature(enable = "avx512f")]
9095#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9096#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang generates fmadd, gcc generates fmsub
9097#[rustc_legacy_const_generics(4)]
9098pub fn _mm512_mask_fmsub_round_pd<const ROUNDING: i32>(
9099 a: __m512d,
9100 k: __mmask8,
9101 b: __m512d,
9102 c: __m512d,
9103) -> __m512d {
9104 unsafe {
9105 static_assert_rounding!(ROUNDING);
9106 let r: __m512d = vfmadd132pdround(a, b, c:simd_neg(c), ROUNDING);
9107 simd_select_bitmask(m:k, yes:r, no:a)
9108 }
9109}
9110
9111/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
9112///
9113/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9114/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9115/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9116/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9117/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9118/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9119///
9120/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmsub_round_pd&expand=2650)
9121#[inline]
9122#[target_feature(enable = "avx512f")]
9123#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9124#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang generates fmadd, gcc generates fmsub
9125#[rustc_legacy_const_generics(4)]
9126pub fn _mm512_maskz_fmsub_round_pd<const ROUNDING: i32>(
9127 k: __mmask8,
9128 a: __m512d,
9129 b: __m512d,
9130 c: __m512d,
9131) -> __m512d {
9132 unsafe {
9133 static_assert_rounding!(ROUNDING);
9134 let r: __m512d = vfmadd132pdround(a, b, c:simd_neg(c), ROUNDING);
9135 simd_select_bitmask(m:k, yes:r, no:_mm512_setzero_pd())
9136 }
9137}
9138
9139/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
9140///
9141/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9142/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9143/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9144/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9145/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9146/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9147///
9148/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmsub_round_pd&expand=2649)
9149#[inline]
9150#[target_feature(enable = "avx512f")]
9151#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9152#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang generates fmadd, gcc generates fmsub
9153#[rustc_legacy_const_generics(4)]
9154pub fn _mm512_mask3_fmsub_round_pd<const ROUNDING: i32>(
9155 a: __m512d,
9156 b: __m512d,
9157 c: __m512d,
9158 k: __mmask8,
9159) -> __m512d {
9160 unsafe {
9161 static_assert_rounding!(ROUNDING);
9162 let r: __m512d = vfmadd132pdround(a, b, c:simd_neg(c), ROUNDING);
9163 simd_select_bitmask(m:k, yes:r, no:c)
9164 }
9165}
9166
9167/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst.\
9168///
9169/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9170/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9171/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9172/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9173/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9174/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9175///
9176/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmaddsub_round_ps&expand=2619)
9177#[inline]
9178#[target_feature(enable = "avx512f")]
9179#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9180#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = 8))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
9181#[rustc_legacy_const_generics(3)]
9182pub fn _mm512_fmaddsub_round_ps<const ROUNDING: i32>(a: __m512, b: __m512, c: __m512) -> __m512 {
9183 unsafe {
9184 static_assert_rounding!(ROUNDING);
9185 vfmaddsubpsround(a, b, c, ROUNDING)
9186 }
9187}
9188
9189/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
9190///
9191/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9192/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9193/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9194/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9195/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9196/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9197///
9198/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmaddsub_round_ps&expand=2620)
9199#[inline]
9200#[target_feature(enable = "avx512f")]
9201#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9202#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = 8))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
9203#[rustc_legacy_const_generics(4)]
9204pub fn _mm512_mask_fmaddsub_round_ps<const ROUNDING: i32>(
9205 a: __m512,
9206 k: __mmask16,
9207 b: __m512,
9208 c: __m512,
9209) -> __m512 {
9210 unsafe {
9211 static_assert_rounding!(ROUNDING);
9212 simd_select_bitmask(m:k, yes:vfmaddsubpsround(a, b, c, ROUNDING), no:a)
9213 }
9214}
9215
9216/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
9217///
9218/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9219/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9220/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9221/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9222/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9223/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9224///
9225/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmaddsub_round_ps&expand=2622)
9226#[inline]
9227#[target_feature(enable = "avx512f")]
9228#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9229#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = 8))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
9230#[rustc_legacy_const_generics(4)]
9231pub fn _mm512_maskz_fmaddsub_round_ps<const ROUNDING: i32>(
9232 k: __mmask16,
9233 a: __m512,
9234 b: __m512,
9235 c: __m512,
9236) -> __m512 {
9237 unsafe {
9238 static_assert_rounding!(ROUNDING);
9239 simd_select_bitmask(m:k, yes:vfmaddsubpsround(a, b, c, ROUNDING), no:_mm512_setzero_ps())
9240 }
9241}
9242
9243/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
9244///
9245/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9246/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9247/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9248/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9249/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9250/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9251///
9252/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmaddsub_round_ps&expand=2621)
9253#[inline]
9254#[target_feature(enable = "avx512f")]
9255#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9256#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = 8))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
9257#[rustc_legacy_const_generics(4)]
9258pub fn _mm512_mask3_fmaddsub_round_ps<const ROUNDING: i32>(
9259 a: __m512,
9260 b: __m512,
9261 c: __m512,
9262 k: __mmask16,
9263) -> __m512 {
9264 unsafe {
9265 static_assert_rounding!(ROUNDING);
9266 simd_select_bitmask(m:k, yes:vfmaddsubpsround(a, b, c, ROUNDING), no:c)
9267 }
9268}
9269
9270/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst.\
9271///
9272/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9273/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9274/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9275/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9276/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9277/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9278///
9279/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmaddsub_round_pd&expand=2615)
9280#[inline]
9281#[target_feature(enable = "avx512f")]
9282#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9283#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = 8))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
9284#[rustc_legacy_const_generics(3)]
9285pub fn _mm512_fmaddsub_round_pd<const ROUNDING: i32>(
9286 a: __m512d,
9287 b: __m512d,
9288 c: __m512d,
9289) -> __m512d {
9290 unsafe {
9291 static_assert_rounding!(ROUNDING);
9292 vfmaddsubpdround(a, b, c, ROUNDING)
9293 }
9294}
9295
9296/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
9297///
9298/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9299/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9300/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9301/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9302/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9303/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9304///
9305/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmaddsub_round_pd&expand=2616)
9306#[inline]
9307#[target_feature(enable = "avx512f")]
9308#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9309#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = 8))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
9310#[rustc_legacy_const_generics(4)]
9311pub fn _mm512_mask_fmaddsub_round_pd<const ROUNDING: i32>(
9312 a: __m512d,
9313 k: __mmask8,
9314 b: __m512d,
9315 c: __m512d,
9316) -> __m512d {
9317 unsafe {
9318 static_assert_rounding!(ROUNDING);
9319 simd_select_bitmask(m:k, yes:vfmaddsubpdround(a, b, c, ROUNDING), no:a)
9320 }
9321}
9322
9323/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
9324///
9325/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9326/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9327/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9328/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9329/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9330/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9331///
9332/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmaddsub_round_pd&expand=2618)
9333#[inline]
9334#[target_feature(enable = "avx512f")]
9335#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9336#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = 8))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
9337#[rustc_legacy_const_generics(4)]
9338pub fn _mm512_maskz_fmaddsub_round_pd<const ROUNDING: i32>(
9339 k: __mmask8,
9340 a: __m512d,
9341 b: __m512d,
9342 c: __m512d,
9343) -> __m512d {
9344 unsafe {
9345 static_assert_rounding!(ROUNDING);
9346 simd_select_bitmask(m:k, yes:vfmaddsubpdround(a, b, c, ROUNDING), no:_mm512_setzero_pd())
9347 }
9348}
9349
9350/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
9351///
9352/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9353/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9354/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9355/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9356/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9357/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9358///
9359/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmaddsub_round_pd&expand=2617)
9360#[inline]
9361#[target_feature(enable = "avx512f")]
9362#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9363#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = 8))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
9364#[rustc_legacy_const_generics(4)]
9365pub fn _mm512_mask3_fmaddsub_round_pd<const ROUNDING: i32>(
9366 a: __m512d,
9367 b: __m512d,
9368 c: __m512d,
9369 k: __mmask8,
9370) -> __m512d {
9371 unsafe {
9372 static_assert_rounding!(ROUNDING);
9373 simd_select_bitmask(m:k, yes:vfmaddsubpdround(a, b, c, ROUNDING), no:c)
9374 }
9375}
9376
9377/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst.\
9378///
9379/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9380/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9381/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9382/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9383/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9384/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9385///
9386/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmsubadd_round_ps&expand=2699)
9387#[inline]
9388#[target_feature(enable = "avx512f")]
9389#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9390#[cfg_attr(test, assert_instr(vfmsubadd, ROUNDING = 8))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
9391#[rustc_legacy_const_generics(3)]
9392pub fn _mm512_fmsubadd_round_ps<const ROUNDING: i32>(a: __m512, b: __m512, c: __m512) -> __m512 {
9393 unsafe {
9394 static_assert_rounding!(ROUNDING);
9395 vfmaddsubpsround(a, b, c:simd_neg(c), ROUNDING)
9396 }
9397}
9398
9399/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
9400///
9401/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9402/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9403/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9404/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9405/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9406/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9407///
9408/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmsubadd_round_ps&expand=2700)
9409#[inline]
9410#[target_feature(enable = "avx512f")]
9411#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9412#[cfg_attr(test, assert_instr(vfmsubadd, ROUNDING = 8))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
9413#[rustc_legacy_const_generics(4)]
9414pub fn _mm512_mask_fmsubadd_round_ps<const ROUNDING: i32>(
9415 a: __m512,
9416 k: __mmask16,
9417 b: __m512,
9418 c: __m512,
9419) -> __m512 {
9420 unsafe {
9421 static_assert_rounding!(ROUNDING);
9422 let r: __m512 = vfmaddsubpsround(a, b, c:simd_neg(c), ROUNDING);
9423 simd_select_bitmask(m:k, yes:r, no:a)
9424 }
9425}
9426
9427/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
9428///
9429/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9430/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9431/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9432/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9433/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9434/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9435///
9436/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmsubadd_round_ps&expand=2702)
9437#[inline]
9438#[target_feature(enable = "avx512f")]
9439#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9440#[cfg_attr(test, assert_instr(vfmsubadd, ROUNDING = 8))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
9441#[rustc_legacy_const_generics(4)]
9442pub fn _mm512_maskz_fmsubadd_round_ps<const ROUNDING: i32>(
9443 k: __mmask16,
9444 a: __m512,
9445 b: __m512,
9446 c: __m512,
9447) -> __m512 {
9448 unsafe {
9449 static_assert_rounding!(ROUNDING);
9450 let r: __m512 = vfmaddsubpsround(a, b, c:simd_neg(c), ROUNDING);
9451 simd_select_bitmask(m:k, yes:r, no:_mm512_setzero_ps())
9452 }
9453}
9454
9455/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
9456///
9457/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9458/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9459/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9460/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9461/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9462/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9463///
9464/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmsubadd_round_ps&expand=2701)
9465#[inline]
9466#[target_feature(enable = "avx512f")]
9467#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9468#[cfg_attr(test, assert_instr(vfmsubadd, ROUNDING = 8))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
9469#[rustc_legacy_const_generics(4)]
9470pub fn _mm512_mask3_fmsubadd_round_ps<const ROUNDING: i32>(
9471 a: __m512,
9472 b: __m512,
9473 c: __m512,
9474 k: __mmask16,
9475) -> __m512 {
9476 unsafe {
9477 static_assert_rounding!(ROUNDING);
9478 let r: __m512 = vfmaddsubpsround(a, b, c:simd_neg(c), ROUNDING);
9479 simd_select_bitmask(m:k, yes:r, no:c)
9480 }
9481}
9482
9483/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst.\
9484///
9485/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9486/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9487/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9488/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9489/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9490/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9491///
9492/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmsubadd_round_pd&expand=2695)
9493#[inline]
9494#[target_feature(enable = "avx512f")]
9495#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9496#[cfg_attr(test, assert_instr(vfmsubadd, ROUNDING = 8))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
9497#[rustc_legacy_const_generics(3)]
9498pub fn _mm512_fmsubadd_round_pd<const ROUNDING: i32>(
9499 a: __m512d,
9500 b: __m512d,
9501 c: __m512d,
9502) -> __m512d {
9503 unsafe {
9504 static_assert_rounding!(ROUNDING);
9505 vfmaddsubpdround(a, b, c:simd_neg(c), ROUNDING)
9506 }
9507}
9508
9509/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
9510///
9511/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9512/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9513/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9514/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9515/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9516/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9517///
9518/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmsubadd_round_pd&expand=2696)
9519#[inline]
9520#[target_feature(enable = "avx512f")]
9521#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9522#[cfg_attr(test, assert_instr(vfmsubadd, ROUNDING = 8))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
9523#[rustc_legacy_const_generics(4)]
9524pub fn _mm512_mask_fmsubadd_round_pd<const ROUNDING: i32>(
9525 a: __m512d,
9526 k: __mmask8,
9527 b: __m512d,
9528 c: __m512d,
9529) -> __m512d {
9530 unsafe {
9531 static_assert_rounding!(ROUNDING);
9532 let r: __m512d = vfmaddsubpdround(a, b, c:simd_neg(c), ROUNDING);
9533 simd_select_bitmask(m:k, yes:r, no:a)
9534 }
9535}
9536
9537/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
9538///
9539/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9540/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9541/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9542/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9543/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9544/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9545///
9546/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmsubadd_round_pd&expand=2698)
9547#[inline]
9548#[target_feature(enable = "avx512f")]
9549#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9550#[cfg_attr(test, assert_instr(vfmsubadd, ROUNDING = 8))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
9551#[rustc_legacy_const_generics(4)]
9552pub fn _mm512_maskz_fmsubadd_round_pd<const ROUNDING: i32>(
9553 k: __mmask8,
9554 a: __m512d,
9555 b: __m512d,
9556 c: __m512d,
9557) -> __m512d {
9558 unsafe {
9559 static_assert_rounding!(ROUNDING);
9560 let r: __m512d = vfmaddsubpdround(a, b, c:simd_neg(c), ROUNDING);
9561 simd_select_bitmask(m:k, yes:r, no:_mm512_setzero_pd())
9562 }
9563}
9564
9565/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
9566///
9567/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9568/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9569/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9570/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9571/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9572/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9573///
9574/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmsubadd_round_pd&expand=2697)
9575#[inline]
9576#[target_feature(enable = "avx512f")]
9577#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9578#[cfg_attr(test, assert_instr(vfmsubadd, ROUNDING = 8))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
9579#[rustc_legacy_const_generics(4)]
9580pub fn _mm512_mask3_fmsubadd_round_pd<const ROUNDING: i32>(
9581 a: __m512d,
9582 b: __m512d,
9583 c: __m512d,
9584 k: __mmask8,
9585) -> __m512d {
9586 unsafe {
9587 static_assert_rounding!(ROUNDING);
9588 let r: __m512d = vfmaddsubpdround(a, b, c:simd_neg(c), ROUNDING);
9589 simd_select_bitmask(m:k, yes:r, no:c)
9590 }
9591}
9592
9593/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst.\
9594///
9595/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9596/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9597/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9598/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9599/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9600/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9601///
9602/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fnmadd_round_ps&expand=2731)
9603#[inline]
9604#[target_feature(enable = "avx512f")]
9605#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9606#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
9607#[rustc_legacy_const_generics(3)]
9608pub fn _mm512_fnmadd_round_ps<const ROUNDING: i32>(a: __m512, b: __m512, c: __m512) -> __m512 {
9609 unsafe {
9610 static_assert_rounding!(ROUNDING);
9611 vfmadd132psround(a:simd_neg(a), b, c, ROUNDING)
9612 }
9613}
9614
9615/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
9616///
9617/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9618/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9619/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9620/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9621/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9622/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9623///
9624/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fnmadd_round_ps&expand=2732)
9625#[inline]
9626#[target_feature(enable = "avx512f")]
9627#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9628#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
9629#[rustc_legacy_const_generics(4)]
9630pub fn _mm512_mask_fnmadd_round_ps<const ROUNDING: i32>(
9631 a: __m512,
9632 k: __mmask16,
9633 b: __m512,
9634 c: __m512,
9635) -> __m512 {
9636 unsafe {
9637 static_assert_rounding!(ROUNDING);
9638 let r: __m512 = vfmadd132psround(a:simd_neg(a), b, c, ROUNDING);
9639 simd_select_bitmask(m:k, yes:r, no:a)
9640 }
9641}
9642
9643/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
9644///
9645/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9646/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9647/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9648/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9649/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9650/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9651///
9652/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fnmadd_round_ps&expand=2734)
9653#[inline]
9654#[target_feature(enable = "avx512f")]
9655#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9656#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
9657#[rustc_legacy_const_generics(4)]
9658pub fn _mm512_maskz_fnmadd_round_ps<const ROUNDING: i32>(
9659 k: __mmask16,
9660 a: __m512,
9661 b: __m512,
9662 c: __m512,
9663) -> __m512 {
9664 unsafe {
9665 static_assert_rounding!(ROUNDING);
9666 let r: __m512 = vfmadd132psround(a:simd_neg(a), b, c, ROUNDING);
9667 simd_select_bitmask(m:k, yes:r, no:_mm512_setzero_ps())
9668 }
9669}
9670
9671/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
9672///
9673/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9674/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9675/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9676/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9677/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9678/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9679///
9680/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fnmadd_round_ps&expand=2733)
9681#[inline]
9682#[target_feature(enable = "avx512f")]
9683#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9684#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
9685#[rustc_legacy_const_generics(4)]
9686pub fn _mm512_mask3_fnmadd_round_ps<const ROUNDING: i32>(
9687 a: __m512,
9688 b: __m512,
9689 c: __m512,
9690 k: __mmask16,
9691) -> __m512 {
9692 unsafe {
9693 static_assert_rounding!(ROUNDING);
9694 let r: __m512 = vfmadd132psround(a:simd_neg(a), b, c, ROUNDING);
9695 simd_select_bitmask(m:k, yes:r, no:c)
9696 }
9697}
9698
9699/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst.\
9700///
9701/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9702/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9703/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9704/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9705/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9706/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9707///
9708/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fnmadd_round_pd&expand=2711)
9709#[inline]
9710#[target_feature(enable = "avx512f")]
9711#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9712#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
9713#[rustc_legacy_const_generics(3)]
9714pub fn _mm512_fnmadd_round_pd<const ROUNDING: i32>(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
9715 unsafe {
9716 static_assert_rounding!(ROUNDING);
9717 vfmadd132pdround(a:simd_neg(a), b, c, ROUNDING)
9718 }
9719}
9720
9721/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
9722///
9723/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9724/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9725/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9726/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9727/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9728/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9729///
9730/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fnmadd_round_pd&expand=2728)
9731#[inline]
9732#[target_feature(enable = "avx512f")]
9733#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9734#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
9735#[rustc_legacy_const_generics(4)]
9736pub fn _mm512_mask_fnmadd_round_pd<const ROUNDING: i32>(
9737 a: __m512d,
9738 k: __mmask8,
9739 b: __m512d,
9740 c: __m512d,
9741) -> __m512d {
9742 unsafe {
9743 static_assert_rounding!(ROUNDING);
9744 let r: __m512d = vfmadd132pdround(a:simd_neg(a), b, c, ROUNDING);
9745 simd_select_bitmask(m:k, yes:r, no:a)
9746 }
9747}
9748
9749/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
9750///
9751/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9752/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9753/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9754/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9755/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9756/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9757///
9758/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fnmadd_round_pd&expand=2730)
9759#[inline]
9760#[target_feature(enable = "avx512f")]
9761#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9762#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
9763#[rustc_legacy_const_generics(4)]
9764pub fn _mm512_maskz_fnmadd_round_pd<const ROUNDING: i32>(
9765 k: __mmask8,
9766 a: __m512d,
9767 b: __m512d,
9768 c: __m512d,
9769) -> __m512d {
9770 unsafe {
9771 static_assert_rounding!(ROUNDING);
9772 let r: __m512d = vfmadd132pdround(a:simd_neg(a), b, c, ROUNDING);
9773 simd_select_bitmask(m:k, yes:r, no:_mm512_setzero_pd())
9774 }
9775}
9776
9777/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
9778///
9779/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9780/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9781/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9782/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9783/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9784/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9785///
9786/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fnmadd_round_pd&expand=2729)
9787#[inline]
9788#[target_feature(enable = "avx512f")]
9789#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9790#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
9791#[rustc_legacy_const_generics(4)]
9792pub fn _mm512_mask3_fnmadd_round_pd<const ROUNDING: i32>(
9793 a: __m512d,
9794 b: __m512d,
9795 c: __m512d,
9796 k: __mmask8,
9797) -> __m512d {
9798 unsafe {
9799 static_assert_rounding!(ROUNDING);
9800 let r: __m512d = vfmadd132pdround(a:simd_neg(a), b, c, ROUNDING);
9801 simd_select_bitmask(m:k, yes:r, no:c)
9802 }
9803}
9804
9805/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst.\
9806///
9807/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9808/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9809/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9810/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9811/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9812/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9813///
9814/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fnmsub_round_ps&expand=2779)
9815#[inline]
9816#[target_feature(enable = "avx512f")]
9817#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9818#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
9819#[rustc_legacy_const_generics(3)]
9820pub fn _mm512_fnmsub_round_ps<const ROUNDING: i32>(a: __m512, b: __m512, c: __m512) -> __m512 {
9821 unsafe {
9822 static_assert_rounding!(ROUNDING);
9823 vfmadd132psround(a:simd_neg(a), b, c:simd_neg(c), ROUNDING)
9824 }
9825}
9826
9827/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
9828///
9829/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9830/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9831/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9832/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9833/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9834/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9835///
9836/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fnmsub_round_ps&expand=2780)
9837#[inline]
9838#[target_feature(enable = "avx512f")]
9839#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9840#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
9841#[rustc_legacy_const_generics(4)]
9842pub fn _mm512_mask_fnmsub_round_ps<const ROUNDING: i32>(
9843 a: __m512,
9844 k: __mmask16,
9845 b: __m512,
9846 c: __m512,
9847) -> __m512 {
9848 unsafe {
9849 static_assert_rounding!(ROUNDING);
9850 let r: __m512 = vfmadd132psround(a:simd_neg(a), b, c:simd_neg(c), ROUNDING);
9851 simd_select_bitmask(m:k, yes:r, no:a)
9852 }
9853}
9854
9855/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
9856///
9857/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9858/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9859/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9860/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9861/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9862/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9863///
9864/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fnmsub_round_ps&expand=2782)
9865#[inline]
9866#[target_feature(enable = "avx512f")]
9867#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9868#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
9869#[rustc_legacy_const_generics(4)]
9870pub fn _mm512_maskz_fnmsub_round_ps<const ROUNDING: i32>(
9871 k: __mmask16,
9872 a: __m512,
9873 b: __m512,
9874 c: __m512,
9875) -> __m512 {
9876 unsafe {
9877 static_assert_rounding!(ROUNDING);
9878 let r: __m512 = vfmadd132psround(a:simd_neg(a), b, c:simd_neg(c), ROUNDING);
9879 simd_select_bitmask(m:k, yes:r, no:_mm512_setzero_ps())
9880 }
9881}
9882
9883/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
9884///
9885/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9886/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9887/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9888/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9889/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9890/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9891///
9892/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fnmsub_round_ps&expand=2781)
9893#[inline]
9894#[target_feature(enable = "avx512f")]
9895#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9896#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
9897#[rustc_legacy_const_generics(4)]
9898pub fn _mm512_mask3_fnmsub_round_ps<const ROUNDING: i32>(
9899 a: __m512,
9900 b: __m512,
9901 c: __m512,
9902 k: __mmask16,
9903) -> __m512 {
9904 unsafe {
9905 static_assert_rounding!(ROUNDING);
9906 let r: __m512 = vfmadd132psround(a:simd_neg(a), b, c:simd_neg(c), ROUNDING);
9907 simd_select_bitmask(m:k, yes:r, no:c)
9908 }
9909}
9910
9911/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst.\
9912///
9913/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9914/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9915/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9916/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9917/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9918/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9919///
9920/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fnmsub_round_pd&expand=2775)
9921#[inline]
9922#[target_feature(enable = "avx512f")]
9923#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9924#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
9925#[rustc_legacy_const_generics(3)]
9926pub fn _mm512_fnmsub_round_pd<const ROUNDING: i32>(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
9927 unsafe {
9928 static_assert_rounding!(ROUNDING);
9929 vfmadd132pdround(a:simd_neg(a), b, c:simd_neg(c), ROUNDING)
9930 }
9931}
9932
9933/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
9934///
9935/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9936/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9937/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9938/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9939/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9940/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9941///
9942/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fnmsub_round_pd&expand=2776)
9943#[inline]
9944#[target_feature(enable = "avx512f")]
9945#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9946#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
9947#[rustc_legacy_const_generics(4)]
9948pub fn _mm512_mask_fnmsub_round_pd<const ROUNDING: i32>(
9949 a: __m512d,
9950 k: __mmask8,
9951 b: __m512d,
9952 c: __m512d,
9953) -> __m512d {
9954 unsafe {
9955 static_assert_rounding!(ROUNDING);
9956 let r: __m512d = vfmadd132pdround(a:simd_neg(a), b, c:simd_neg(c), ROUNDING);
9957 simd_select_bitmask(m:k, yes:r, no:a)
9958 }
9959}
9960
9961/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
9962///
9963/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9964/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9965/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9966/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9967/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9968/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9969///
9970/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fnmsub_round_pd&expand=2778)
9971#[inline]
9972#[target_feature(enable = "avx512f")]
9973#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9974#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
9975#[rustc_legacy_const_generics(4)]
9976pub fn _mm512_maskz_fnmsub_round_pd<const ROUNDING: i32>(
9977 k: __mmask8,
9978 a: __m512d,
9979 b: __m512d,
9980 c: __m512d,
9981) -> __m512d {
9982 unsafe {
9983 static_assert_rounding!(ROUNDING);
9984 let r: __m512d = vfmadd132pdround(a:simd_neg(a), b, c:simd_neg(c), ROUNDING);
9985 simd_select_bitmask(m:k, yes:r, no:_mm512_setzero_pd())
9986 }
9987}
9988
9989/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
9990///
9991/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9992/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9993/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9994/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9995/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9996/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9997///
9998/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fnmsub_round_pd&expand=2777)
9999#[inline]
10000#[target_feature(enable = "avx512f")]
10001#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10002#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
10003#[rustc_legacy_const_generics(4)]
10004pub fn _mm512_mask3_fnmsub_round_pd<const ROUNDING: i32>(
10005 a: __m512d,
10006 b: __m512d,
10007 c: __m512d,
10008 k: __mmask8,
10009) -> __m512d {
10010 unsafe {
10011 static_assert_rounding!(ROUNDING);
10012 let r: __m512d = vfmadd132pdround(a:simd_neg(a), b, c:simd_neg(c), ROUNDING);
10013 simd_select_bitmask(m:k, yes:r, no:c)
10014 }
10015}
10016
10017/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst.\
10018/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10019///
10020/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_max_round_ps&expand=3662)
10021#[inline]
10022#[target_feature(enable = "avx512f")]
10023#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10024#[cfg_attr(test, assert_instr(vmaxps, SAE = 8))]
10025#[rustc_legacy_const_generics(2)]
10026pub fn _mm512_max_round_ps<const SAE: i32>(a: __m512, b: __m512) -> __m512 {
10027 unsafe {
10028 static_assert_sae!(SAE);
10029 let a: Simd = a.as_f32x16();
10030 let b: Simd = b.as_f32x16();
10031 let r: Simd = vmaxps(a, b, SAE);
10032 transmute(src:r)
10033 }
10034}
10035
10036/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
10037/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10038///
10039/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_max_round_ps&expand=3660)
10040#[inline]
10041#[target_feature(enable = "avx512f")]
10042#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10043#[cfg_attr(test, assert_instr(vmaxps, SAE = 8))]
10044#[rustc_legacy_const_generics(4)]
10045pub fn _mm512_mask_max_round_ps<const SAE: i32>(
10046 src: __m512,
10047 k: __mmask16,
10048 a: __m512,
10049 b: __m512,
10050) -> __m512 {
10051 unsafe {
10052 static_assert_sae!(SAE);
10053 let a: Simd = a.as_f32x16();
10054 let b: Simd = b.as_f32x16();
10055 let r: Simd = vmaxps(a, b, SAE);
10056 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_f32x16()))
10057 }
10058}
10059
10060/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
10061/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10062///
10063/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_max_round_ps&expand=3661)
10064#[inline]
10065#[target_feature(enable = "avx512f")]
10066#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10067#[cfg_attr(test, assert_instr(vmaxps, SAE = 8))]
10068#[rustc_legacy_const_generics(3)]
10069pub fn _mm512_maskz_max_round_ps<const SAE: i32>(k: __mmask16, a: __m512, b: __m512) -> __m512 {
10070 unsafe {
10071 static_assert_sae!(SAE);
10072 let a: Simd = a.as_f32x16();
10073 let b: Simd = b.as_f32x16();
10074 let r: Simd = vmaxps(a, b, SAE);
10075 transmute(src:simd_select_bitmask(m:k, yes:r, no:f32x16::ZERO))
10076 }
10077}
10078
10079/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst.\
10080/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10081///
10082/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_max_round_pd&expand=3659)
10083#[inline]
10084#[target_feature(enable = "avx512f")]
10085#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10086#[cfg_attr(test, assert_instr(vmaxpd, SAE = 8))]
10087#[rustc_legacy_const_generics(2)]
10088pub fn _mm512_max_round_pd<const SAE: i32>(a: __m512d, b: __m512d) -> __m512d {
10089 unsafe {
10090 static_assert_sae!(SAE);
10091 let a: Simd = a.as_f64x8();
10092 let b: Simd = b.as_f64x8();
10093 let r: Simd = vmaxpd(a, b, SAE);
10094 transmute(src:r)
10095 }
10096}
10097
10098/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
10099/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10100///
10101/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_max_round_pd&expand=3657)
10102#[inline]
10103#[target_feature(enable = "avx512f")]
10104#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10105#[cfg_attr(test, assert_instr(vmaxpd, SAE = 8))]
10106#[rustc_legacy_const_generics(4)]
10107pub fn _mm512_mask_max_round_pd<const SAE: i32>(
10108 src: __m512d,
10109 k: __mmask8,
10110 a: __m512d,
10111 b: __m512d,
10112) -> __m512d {
10113 unsafe {
10114 static_assert_sae!(SAE);
10115 let a: Simd = a.as_f64x8();
10116 let b: Simd = b.as_f64x8();
10117 let r: Simd = vmaxpd(a, b, SAE);
10118 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_f64x8()))
10119 }
10120}
10121
10122/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
10123/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10124///
10125/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_max_round_pd&expand=3658)
10126#[inline]
10127#[target_feature(enable = "avx512f")]
10128#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10129#[cfg_attr(test, assert_instr(vmaxpd, SAE = 8))]
10130#[rustc_legacy_const_generics(3)]
10131pub fn _mm512_maskz_max_round_pd<const SAE: i32>(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
10132 unsafe {
10133 static_assert_sae!(SAE);
10134 let a: Simd = a.as_f64x8();
10135 let b: Simd = b.as_f64x8();
10136 let r: Simd = vmaxpd(a, b, SAE);
10137 transmute(src:simd_select_bitmask(m:k, yes:r, no:f64x8::ZERO))
10138 }
10139}
10140
10141/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst.\
10142/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10143///
10144/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_min_round_ps&expand=3776)
10145#[inline]
10146#[target_feature(enable = "avx512f")]
10147#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10148#[cfg_attr(test, assert_instr(vminps, SAE = 8))]
10149#[rustc_legacy_const_generics(2)]
10150pub fn _mm512_min_round_ps<const SAE: i32>(a: __m512, b: __m512) -> __m512 {
10151 unsafe {
10152 static_assert_sae!(SAE);
10153 let a: Simd = a.as_f32x16();
10154 let b: Simd = b.as_f32x16();
10155 let r: Simd = vminps(a, b, SAE);
10156 transmute(src:r)
10157 }
10158}
10159
10160/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
10161/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10162///
10163/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_min_round_ps&expand=3774)
10164#[inline]
10165#[target_feature(enable = "avx512f")]
10166#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10167#[cfg_attr(test, assert_instr(vminps, SAE = 8))]
10168#[rustc_legacy_const_generics(4)]
10169pub fn _mm512_mask_min_round_ps<const SAE: i32>(
10170 src: __m512,
10171 k: __mmask16,
10172 a: __m512,
10173 b: __m512,
10174) -> __m512 {
10175 unsafe {
10176 static_assert_sae!(SAE);
10177 let a: Simd = a.as_f32x16();
10178 let b: Simd = b.as_f32x16();
10179 let r: Simd = vminps(a, b, SAE);
10180 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_f32x16()))
10181 }
10182}
10183
10184/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
10185/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10186///
10187/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_min_round_ps&expand=3775)
10188#[inline]
10189#[target_feature(enable = "avx512f")]
10190#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10191#[cfg_attr(test, assert_instr(vminps, SAE = 8))]
10192#[rustc_legacy_const_generics(3)]
10193pub fn _mm512_maskz_min_round_ps<const SAE: i32>(k: __mmask16, a: __m512, b: __m512) -> __m512 {
10194 unsafe {
10195 static_assert_sae!(SAE);
10196 let a: Simd = a.as_f32x16();
10197 let b: Simd = b.as_f32x16();
10198 let r: Simd = vminps(a, b, SAE);
10199 transmute(src:simd_select_bitmask(m:k, yes:r, no:f32x16::ZERO))
10200 }
10201}
10202
10203/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst.\
10204/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10205///
10206/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_min_round_pd&expand=3773)
10207#[inline]
10208#[target_feature(enable = "avx512f")]
10209#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10210#[cfg_attr(test, assert_instr(vminpd, SAE = 8))]
10211#[rustc_legacy_const_generics(2)]
10212pub fn _mm512_min_round_pd<const SAE: i32>(a: __m512d, b: __m512d) -> __m512d {
10213 unsafe {
10214 static_assert_sae!(SAE);
10215 let a: Simd = a.as_f64x8();
10216 let b: Simd = b.as_f64x8();
10217 let r: Simd = vminpd(a, b, SAE);
10218 transmute(src:r)
10219 }
10220}
10221
10222/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
10223/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10224///
10225/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_min_round_pd&expand=3771)
10226#[inline]
10227#[target_feature(enable = "avx512f")]
10228#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10229#[cfg_attr(test, assert_instr(vminpd, SAE = 8))]
10230#[rustc_legacy_const_generics(4)]
10231pub fn _mm512_mask_min_round_pd<const SAE: i32>(
10232 src: __m512d,
10233 k: __mmask8,
10234 a: __m512d,
10235 b: __m512d,
10236) -> __m512d {
10237 unsafe {
10238 static_assert_sae!(SAE);
10239 let a: Simd = a.as_f64x8();
10240 let b: Simd = b.as_f64x8();
10241 let r: Simd = vminpd(a, b, SAE);
10242 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_f64x8()))
10243 }
10244}
10245
10246/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
10247/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10248///
10249/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_min_round_pd&expand=3772)
10250#[inline]
10251#[target_feature(enable = "avx512f")]
10252#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10253#[cfg_attr(test, assert_instr(vminpd, SAE = 8))]
10254#[rustc_legacy_const_generics(3)]
10255pub fn _mm512_maskz_min_round_pd<const SAE: i32>(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
10256 unsafe {
10257 static_assert_sae!(SAE);
10258 let a: Simd = a.as_f64x8();
10259 let b: Simd = b.as_f64x8();
10260 let r: Simd = vminpd(a, b, SAE);
10261 transmute(src:simd_select_bitmask(m:k, yes:r, no:f64x8::ZERO))
10262 }
10263}
10264
10265/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst. This intrinsic essentially calculates floor(log2(x)) for each element.\
10266/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10267///
10268/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_getexp_round_ps&expand=2850)
10269#[inline]
10270#[target_feature(enable = "avx512f")]
10271#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10272#[cfg_attr(test, assert_instr(vgetexpps, SAE = 8))]
10273#[rustc_legacy_const_generics(1)]
10274pub fn _mm512_getexp_round_ps<const SAE: i32>(a: __m512) -> __m512 {
10275 unsafe {
10276 static_assert_sae!(SAE);
10277 let a: Simd = a.as_f32x16();
10278 let r: Simd = vgetexpps(a, src:f32x16::ZERO, m:0b11111111_11111111, SAE);
10279 transmute(src:r)
10280 }
10281}
10282
10283/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.\
10284/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10285///
10286/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_getexp_round_ps&expand=2851)
10287#[inline]
10288#[target_feature(enable = "avx512f")]
10289#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10290#[cfg_attr(test, assert_instr(vgetexpps, SAE = 8))]
10291#[rustc_legacy_const_generics(3)]
10292pub fn _mm512_mask_getexp_round_ps<const SAE: i32>(src: __m512, k: __mmask16, a: __m512) -> __m512 {
10293 unsafe {
10294 static_assert_sae!(SAE);
10295 let a: Simd = a.as_f32x16();
10296 let src: Simd = src.as_f32x16();
10297 let r: Simd = vgetexpps(a, src, m:k, SAE);
10298 transmute(src:r)
10299 }
10300}
10301
10302/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.\
10303/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10304///
10305/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_getexp_round_ps&expand=2852)
10306#[inline]
10307#[target_feature(enable = "avx512f")]
10308#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10309#[cfg_attr(test, assert_instr(vgetexpps, SAE = 8))]
10310#[rustc_legacy_const_generics(2)]
10311pub fn _mm512_maskz_getexp_round_ps<const SAE: i32>(k: __mmask16, a: __m512) -> __m512 {
10312 unsafe {
10313 static_assert_sae!(SAE);
10314 let a: Simd = a.as_f32x16();
10315 let r: Simd = vgetexpps(a, src:f32x16::ZERO, m:k, SAE);
10316 transmute(src:r)
10317 }
10318}
10319
10320/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst. This intrinsic essentially calculates floor(log2(x)) for each element.\
10321/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10322///
10323/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_getexp_round_pd&expand=2847)
10324#[inline]
10325#[target_feature(enable = "avx512f")]
10326#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10327#[cfg_attr(test, assert_instr(vgetexppd, SAE = 8))]
10328#[rustc_legacy_const_generics(1)]
10329pub fn _mm512_getexp_round_pd<const SAE: i32>(a: __m512d) -> __m512d {
10330 unsafe {
10331 static_assert_sae!(SAE);
10332 let a: Simd = a.as_f64x8();
10333 let r: Simd = vgetexppd(a, src:f64x8::ZERO, m:0b11111111, SAE);
10334 transmute(src:r)
10335 }
10336}
10337
10338/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.\
10339/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10340///
10341/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_getexp_round_pd&expand=2848)
10342#[inline]
10343#[target_feature(enable = "avx512f")]
10344#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10345#[cfg_attr(test, assert_instr(vgetexppd, SAE = 8))]
10346#[rustc_legacy_const_generics(3)]
10347pub fn _mm512_mask_getexp_round_pd<const SAE: i32>(
10348 src: __m512d,
10349 k: __mmask8,
10350 a: __m512d,
10351) -> __m512d {
10352 unsafe {
10353 static_assert_sae!(SAE);
10354 let a: Simd = a.as_f64x8();
10355 let src: Simd = src.as_f64x8();
10356 let r: Simd = vgetexppd(a, src, m:k, SAE);
10357 transmute(src:r)
10358 }
10359}
10360
10361/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.\
10362/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10363///
10364/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_getexp_round_pd&expand=2849)
10365#[inline]
10366#[target_feature(enable = "avx512f")]
10367#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10368#[cfg_attr(test, assert_instr(vgetexppd, SAE = 8))]
10369#[rustc_legacy_const_generics(2)]
10370pub fn _mm512_maskz_getexp_round_pd<const SAE: i32>(k: __mmask8, a: __m512d) -> __m512d {
10371 unsafe {
10372 static_assert_sae!(SAE);
10373 let a: Simd = a.as_f64x8();
10374 let r: Simd = vgetexppd(a, src:f64x8::ZERO, m:k, SAE);
10375 transmute(src:r)
10376 }
10377}
10378
10379/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst.\
10380/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
10381/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10382/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10383/// * [`_MM_FROUND_TO_POS_INF`] : round up
10384/// * [`_MM_FROUND_TO_ZERO`] : truncate
10385/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10386///
10387/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10388/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_roundscale_round_ps&expand=4790)
10389#[inline]
10390#[target_feature(enable = "avx512f")]
10391#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10392#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 0, SAE = 8))]
10393#[rustc_legacy_const_generics(1, 2)]
10394pub fn _mm512_roundscale_round_ps<const IMM8: i32, const SAE: i32>(a: __m512) -> __m512 {
10395 unsafe {
10396 static_assert_uimm_bits!(IMM8, 8);
10397 static_assert_mantissas_sae!(SAE);
10398 let a: Simd = a.as_f32x16();
10399 let r: Simd = vrndscaleps(a, IMM8, src:f32x16::ZERO, mask:0b11111111_11111111, SAE);
10400 transmute(src:r)
10401 }
10402}
10403
10404/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
10405/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
10406/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10407/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10408/// * [`_MM_FROUND_TO_POS_INF`] : round up
10409/// * [`_MM_FROUND_TO_ZERO`] : truncate
10410/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10411///
10412/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10413/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_roundscale_round_ps&expand=4788)
10414#[inline]
10415#[target_feature(enable = "avx512f")]
10416#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10417#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 0, SAE = 8))]
10418#[rustc_legacy_const_generics(3, 4)]
10419pub fn _mm512_mask_roundscale_round_ps<const IMM8: i32, const SAE: i32>(
10420 src: __m512,
10421 k: __mmask16,
10422 a: __m512,
10423) -> __m512 {
10424 unsafe {
10425 static_assert_uimm_bits!(IMM8, 8);
10426 static_assert_mantissas_sae!(SAE);
10427 let a: Simd = a.as_f32x16();
10428 let src: Simd = src.as_f32x16();
10429 let r: Simd = vrndscaleps(a, IMM8, src, mask:k, SAE);
10430 transmute(src:r)
10431 }
10432}
10433
10434/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
10435/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
10436/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10437/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10438/// * [`_MM_FROUND_TO_POS_INF`] : round up
10439/// * [`_MM_FROUND_TO_ZERO`] : truncate
10440/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10441///
10442/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10443/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_roundscale_round_ps&expand=4789)
10444#[inline]
10445#[target_feature(enable = "avx512f")]
10446#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10447#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 0, SAE = 8))]
10448#[rustc_legacy_const_generics(2, 3)]
10449pub fn _mm512_maskz_roundscale_round_ps<const IMM8: i32, const SAE: i32>(
10450 k: __mmask16,
10451 a: __m512,
10452) -> __m512 {
10453 unsafe {
10454 static_assert_uimm_bits!(IMM8, 8);
10455 static_assert_mantissas_sae!(SAE);
10456 let a: Simd = a.as_f32x16();
10457 let r: Simd = vrndscaleps(a, IMM8, src:f32x16::ZERO, mask:k, SAE);
10458 transmute(src:r)
10459 }
10460}
10461
10462/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst.\
10463/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
10464/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10465/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10466/// * [`_MM_FROUND_TO_POS_INF`] : round up
10467/// * [`_MM_FROUND_TO_ZERO`] : truncate
10468/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10469///
10470/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10471/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_roundscale_round_pd&expand=4787)
10472#[inline]
10473#[target_feature(enable = "avx512f")]
10474#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10475#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 0, SAE = 8))]
10476#[rustc_legacy_const_generics(1, 2)]
10477pub fn _mm512_roundscale_round_pd<const IMM8: i32, const SAE: i32>(a: __m512d) -> __m512d {
10478 unsafe {
10479 static_assert_uimm_bits!(IMM8, 8);
10480 static_assert_mantissas_sae!(SAE);
10481 let a: Simd = a.as_f64x8();
10482 let r: Simd = vrndscalepd(a, IMM8, src:f64x8::ZERO, mask:0b11111111, SAE);
10483 transmute(src:r)
10484 }
10485}
10486
10487/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
10488/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
10489/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10490/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10491/// * [`_MM_FROUND_TO_POS_INF`] : round up
10492/// * [`_MM_FROUND_TO_ZERO`] : truncate
10493/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10494///
10495/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10496/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_roundscale_round_pd&expand=4785)
10497#[inline]
10498#[target_feature(enable = "avx512f")]
10499#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10500#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 0, SAE = 8))]
10501#[rustc_legacy_const_generics(3, 4)]
10502pub fn _mm512_mask_roundscale_round_pd<const IMM8: i32, const SAE: i32>(
10503 src: __m512d,
10504 k: __mmask8,
10505 a: __m512d,
10506) -> __m512d {
10507 unsafe {
10508 static_assert_uimm_bits!(IMM8, 8);
10509 static_assert_mantissas_sae!(SAE);
10510 let a: Simd = a.as_f64x8();
10511 let src: Simd = src.as_f64x8();
10512 let r: Simd = vrndscalepd(a, IMM8, src, mask:k, SAE);
10513 transmute(src:r)
10514 }
10515}
10516
10517/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
10518/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
10519/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10520/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10521/// * [`_MM_FROUND_TO_POS_INF`] : round up
10522/// * [`_MM_FROUND_TO_ZERO`] : truncate
10523/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10524///
10525/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10526/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_roundscale_round_pd&expand=4786)
10527#[inline]
10528#[target_feature(enable = "avx512f")]
10529#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10530#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 0, SAE = 8))]
10531#[rustc_legacy_const_generics(2, 3)]
10532pub fn _mm512_maskz_roundscale_round_pd<const IMM8: i32, const SAE: i32>(
10533 k: __mmask8,
10534 a: __m512d,
10535) -> __m512d {
10536 unsafe {
10537 static_assert_uimm_bits!(IMM8, 8);
10538 static_assert_mantissas_sae!(SAE);
10539 let a: Simd = a.as_f64x8();
10540 let r: Simd = vrndscalepd(a, IMM8, src:f64x8::ZERO, mask:k, SAE);
10541 transmute(src:r)
10542 }
10543}
10544
10545/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst.\
10546///
10547/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
10548/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
10549/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
10550/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
10551/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
10552/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10553///
10554/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_scalef_round_ps&expand=4889)
10555#[inline]
10556#[target_feature(enable = "avx512f")]
10557#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10558#[cfg_attr(test, assert_instr(vscalefps, ROUNDING = 8))]
10559#[rustc_legacy_const_generics(2)]
10560pub fn _mm512_scalef_round_ps<const ROUNDING: i32>(a: __m512, b: __m512) -> __m512 {
10561 unsafe {
10562 static_assert_rounding!(ROUNDING);
10563 let a: Simd = a.as_f32x16();
10564 let b: Simd = b.as_f32x16();
10565 let r: Simd = vscalefps(a, b, src:f32x16::ZERO, mask:0b11111111_11111111, ROUNDING);
10566 transmute(src:r)
10567 }
10568}
10569
10570/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
10571///
10572/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
10573/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
10574/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
10575/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
10576/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
10577/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10578///
10579/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_scalef_round_ps&expand=4887)
10580#[inline]
10581#[target_feature(enable = "avx512f")]
10582#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10583#[cfg_attr(test, assert_instr(vscalefps, ROUNDING = 8))]
10584#[rustc_legacy_const_generics(4)]
10585pub fn _mm512_mask_scalef_round_ps<const ROUNDING: i32>(
10586 src: __m512,
10587 k: __mmask16,
10588 a: __m512,
10589 b: __m512,
10590) -> __m512 {
10591 unsafe {
10592 static_assert_rounding!(ROUNDING);
10593 let a: Simd = a.as_f32x16();
10594 let b: Simd = b.as_f32x16();
10595 let src: Simd = src.as_f32x16();
10596 let r: Simd = vscalefps(a, b, src, mask:k, ROUNDING);
10597 transmute(src:r)
10598 }
10599}
10600
10601/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
10602///
10603/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
10604/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
10605/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
10606/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
10607/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
10608/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10609///
10610/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_scalef_round_ps&expand=4888)
10611#[inline]
10612#[target_feature(enable = "avx512f")]
10613#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10614#[cfg_attr(test, assert_instr(vscalefps, ROUNDING = 8))]
10615#[rustc_legacy_const_generics(3)]
10616pub fn _mm512_maskz_scalef_round_ps<const ROUNDING: i32>(
10617 k: __mmask16,
10618 a: __m512,
10619 b: __m512,
10620) -> __m512 {
10621 unsafe {
10622 static_assert_rounding!(ROUNDING);
10623 let a: Simd = a.as_f32x16();
10624 let b: Simd = b.as_f32x16();
10625 let r: Simd = vscalefps(a, b, src:f32x16::ZERO, mask:k, ROUNDING);
10626 transmute(src:r)
10627 }
10628}
10629
10630/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst.\
10631///
10632/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
10633/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
10634/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
10635/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
10636/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
10637/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10638///
10639/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_scalef_round_pd&expand=4886)
10640#[inline]
10641#[target_feature(enable = "avx512f")]
10642#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10643#[cfg_attr(test, assert_instr(vscalefpd, ROUNDING = 8))]
10644#[rustc_legacy_const_generics(2)]
10645pub fn _mm512_scalef_round_pd<const ROUNDING: i32>(a: __m512d, b: __m512d) -> __m512d {
10646 unsafe {
10647 static_assert_rounding!(ROUNDING);
10648 let a: Simd = a.as_f64x8();
10649 let b: Simd = b.as_f64x8();
10650 let r: Simd = vscalefpd(a, b, src:f64x8::ZERO, mask:0b11111111, ROUNDING);
10651 transmute(src:r)
10652 }
10653}
10654
10655/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
10656///
10657/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
10658/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
10659/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
10660/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
10661/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
10662/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10663///
10664/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_scalef_round_pd&expand=4884)
10665#[inline]
10666#[target_feature(enable = "avx512f")]
10667#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10668#[cfg_attr(test, assert_instr(vscalefpd, ROUNDING = 8))]
10669#[rustc_legacy_const_generics(4)]
10670pub fn _mm512_mask_scalef_round_pd<const ROUNDING: i32>(
10671 src: __m512d,
10672 k: __mmask8,
10673 a: __m512d,
10674 b: __m512d,
10675) -> __m512d {
10676 unsafe {
10677 static_assert_rounding!(ROUNDING);
10678 let a: Simd = a.as_f64x8();
10679 let b: Simd = b.as_f64x8();
10680 let src: Simd = src.as_f64x8();
10681 let r: Simd = vscalefpd(a, b, src, mask:k, ROUNDING);
10682 transmute(src:r)
10683 }
10684}
10685
10686/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
10687///
10688/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
10689/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
10690/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
10691/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
10692/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
10693/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10694///
10695/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_scalef_round_pd&expand=4885)
10696#[inline]
10697#[target_feature(enable = "avx512f")]
10698#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10699#[cfg_attr(test, assert_instr(vscalefpd, ROUNDING = 8))]
10700#[rustc_legacy_const_generics(3)]
10701pub fn _mm512_maskz_scalef_round_pd<const ROUNDING: i32>(
10702 k: __mmask8,
10703 a: __m512d,
10704 b: __m512d,
10705) -> __m512d {
10706 unsafe {
10707 static_assert_rounding!(ROUNDING);
10708 let a: Simd = a.as_f64x8();
10709 let b: Simd = b.as_f64x8();
10710 let r: Simd = vscalefpd(a, b, src:f64x8::ZERO, mask:k, ROUNDING);
10711 transmute(src:r)
10712 }
10713}
10714
10715/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst. imm8 is used to set the required flags reporting.\
10716///
10717/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10718/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fixupimm_round_ps&expand=2505)
10719#[inline]
10720#[target_feature(enable = "avx512f")]
10721#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10722#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0, SAE = 8))]
10723#[rustc_legacy_const_generics(3, 4)]
10724pub fn _mm512_fixupimm_round_ps<const IMM8: i32, const SAE: i32>(
10725 a: __m512,
10726 b: __m512,
10727 c: __m512i,
10728) -> __m512 {
10729 unsafe {
10730 static_assert_uimm_bits!(IMM8, 8);
10731 static_assert_mantissas_sae!(SAE);
10732 let a: Simd = a.as_f32x16();
10733 let b: Simd = b.as_f32x16();
10734 let c: Simd = c.as_i32x16();
10735 let r: Simd = vfixupimmps(a, b, c, IMM8, mask:0b11111111_11111111, SAE);
10736 transmute(src:r)
10737 }
10738}
10739
10740/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.\
10741///
10742/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10743/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fixupimm_round_ps&expand=2506)
10744#[inline]
10745#[target_feature(enable = "avx512f")]
10746#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10747#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0, SAE = 8))]
10748#[rustc_legacy_const_generics(4, 5)]
10749pub fn _mm512_mask_fixupimm_round_ps<const IMM8: i32, const SAE: i32>(
10750 a: __m512,
10751 k: __mmask16,
10752 b: __m512,
10753 c: __m512i,
10754) -> __m512 {
10755 unsafe {
10756 static_assert_uimm_bits!(IMM8, 8);
10757 static_assert_mantissas_sae!(SAE);
10758 let a: Simd = a.as_f32x16();
10759 let b: Simd = b.as_f32x16();
10760 let c: Simd = c.as_i32x16();
10761 let r: Simd = vfixupimmps(a, b, c, IMM8, mask:k, SAE);
10762 transmute(src:r)
10763 }
10764}
10765
10766/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.\
10767///
10768/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10769/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fixupimm_round_ps&expand=2507)
10770#[inline]
10771#[target_feature(enable = "avx512f")]
10772#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10773#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0, SAE = 8))]
10774#[rustc_legacy_const_generics(4, 5)]
10775pub fn _mm512_maskz_fixupimm_round_ps<const IMM8: i32, const SAE: i32>(
10776 k: __mmask16,
10777 a: __m512,
10778 b: __m512,
10779 c: __m512i,
10780) -> __m512 {
10781 unsafe {
10782 static_assert_uimm_bits!(IMM8, 8);
10783 static_assert_mantissas_sae!(SAE);
10784 let a: Simd = a.as_f32x16();
10785 let b: Simd = b.as_f32x16();
10786 let c: Simd = c.as_i32x16();
10787 let r: Simd = vfixupimmpsz(a, b, c, IMM8, mask:k, SAE);
10788 transmute(src:r)
10789 }
10790}
10791
10792/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst. imm8 is used to set the required flags reporting.\
10793///
10794/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10795/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fixupimm_round_pd&expand=2502)
10796#[inline]
10797#[target_feature(enable = "avx512f")]
10798#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10799#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0, SAE = 8))]
10800#[rustc_legacy_const_generics(3, 4)]
10801pub fn _mm512_fixupimm_round_pd<const IMM8: i32, const SAE: i32>(
10802 a: __m512d,
10803 b: __m512d,
10804 c: __m512i,
10805) -> __m512d {
10806 unsafe {
10807 static_assert_uimm_bits!(IMM8, 8);
10808 static_assert_mantissas_sae!(SAE);
10809 let a: Simd = a.as_f64x8();
10810 let b: Simd = b.as_f64x8();
10811 let c: Simd = c.as_i64x8();
10812 let r: Simd = vfixupimmpd(a, b, c, IMM8, mask:0b11111111, SAE);
10813 transmute(src:r)
10814 }
10815}
10816
10817/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.\
10818///
10819/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10820/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fixupimm_round_pd&expand=2503)
10821#[inline]
10822#[target_feature(enable = "avx512f")]
10823#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10824#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0, SAE = 8))]
10825#[rustc_legacy_const_generics(4, 5)]
10826pub fn _mm512_mask_fixupimm_round_pd<const IMM8: i32, const SAE: i32>(
10827 a: __m512d,
10828 k: __mmask8,
10829 b: __m512d,
10830 c: __m512i,
10831) -> __m512d {
10832 unsafe {
10833 static_assert_uimm_bits!(IMM8, 8);
10834 static_assert_mantissas_sae!(SAE);
10835 let a: Simd = a.as_f64x8();
10836 let b: Simd = b.as_f64x8();
10837 let c: Simd = c.as_i64x8();
10838 let r: Simd = vfixupimmpd(a, b, c, IMM8, mask:k, SAE);
10839 transmute(src:r)
10840 }
10841}
10842
10843/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.\
10844///
10845/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10846/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fixupimm_round_pd&expand=2504)
10847#[inline]
10848#[target_feature(enable = "avx512f")]
10849#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10850#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0, SAE = 8))]
10851#[rustc_legacy_const_generics(4, 5)]
10852pub fn _mm512_maskz_fixupimm_round_pd<const IMM8: i32, const SAE: i32>(
10853 k: __mmask8,
10854 a: __m512d,
10855 b: __m512d,
10856 c: __m512i,
10857) -> __m512d {
10858 unsafe {
10859 static_assert_uimm_bits!(IMM8, 8);
10860 static_assert_mantissas_sae!(SAE);
10861 let a: Simd = a.as_f64x8();
10862 let b: Simd = b.as_f64x8();
10863 let c: Simd = c.as_i64x8();
10864 let r: Simd = vfixupimmpdz(a, b, c, IMM8, mask:k, SAE);
10865 transmute(src:r)
10866 }
10867}
10868
10869/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
10870/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
10871/// _MM_MANT_NORM_1_2 // interval [1, 2)\
10872/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
10873/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
10874/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
10875/// The sign is determined by sc which can take the following values:\
10876/// _MM_MANT_SIGN_src // sign = sign(src)\
10877/// _MM_MANT_SIGN_zero // sign = 0\
10878/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1\
10879/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10880///
10881/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_getmant_round_ps&expand=2886)
10882#[inline]
10883#[target_feature(enable = "avx512f")]
10884#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10885#[cfg_attr(test, assert_instr(vgetmantps, NORM = 0, SIGN = 0, SAE = 4))]
10886#[rustc_legacy_const_generics(1, 2, 3)]
10887pub fn _mm512_getmant_round_ps<
10888 const NORM: _MM_MANTISSA_NORM_ENUM,
10889 const SIGN: _MM_MANTISSA_SIGN_ENUM,
10890 const SAE: i32,
10891>(
10892 a: __m512,
10893) -> __m512 {
10894 unsafe {
10895 static_assert_uimm_bits!(NORM, 4);
10896 static_assert_uimm_bits!(SIGN, 2);
10897 static_assert_mantissas_sae!(SAE);
10898 let a: Simd = a.as_f32x16();
10899 let r: Simd = vgetmantps(a, SIGN << 2 | NORM, src:f32x16::ZERO, m:0b11111111_11111111, SAE);
10900 transmute(src:r)
10901 }
10902}
10903
10904/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
10905/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
10906/// _MM_MANT_NORM_1_2 // interval [1, 2)\
10907/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
10908/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
10909/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
10910/// The sign is determined by sc which can take the following values:\
10911/// _MM_MANT_SIGN_src // sign = sign(src)\
10912/// _MM_MANT_SIGN_zero // sign = 0\
10913/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1\
10914/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10915///
10916/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_getmant_round_ps&expand=2887)
10917#[inline]
10918#[target_feature(enable = "avx512f")]
10919#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10920#[cfg_attr(test, assert_instr(vgetmantps, NORM = 0, SIGN = 0, SAE = 4))]
10921#[rustc_legacy_const_generics(3, 4, 5)]
10922pub fn _mm512_mask_getmant_round_ps<
10923 const NORM: _MM_MANTISSA_NORM_ENUM,
10924 const SIGN: _MM_MANTISSA_SIGN_ENUM,
10925 const SAE: i32,
10926>(
10927 src: __m512,
10928 k: __mmask16,
10929 a: __m512,
10930) -> __m512 {
10931 unsafe {
10932 static_assert_uimm_bits!(NORM, 4);
10933 static_assert_uimm_bits!(SIGN, 2);
10934 static_assert_mantissas_sae!(SAE);
10935 let a: Simd = a.as_f32x16();
10936 let src: Simd = src.as_f32x16();
10937 let r: Simd = vgetmantps(a, SIGN << 2 | NORM, src, m:k, SAE);
10938 transmute(src:r)
10939 }
10940}
10941
10942/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
10943/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
10944/// _MM_MANT_NORM_1_2 // interval [1, 2)\
10945/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
10946/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
10947/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
10948/// The sign is determined by sc which can take the following values:\
10949/// _MM_MANT_SIGN_src // sign = sign(src)\
10950/// _MM_MANT_SIGN_zero // sign = 0\
10951/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1\
10952/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10953///
10954/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_getmant_round_ps&expand=2888)
10955#[inline]
10956#[target_feature(enable = "avx512f")]
10957#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10958#[cfg_attr(test, assert_instr(vgetmantps, NORM = 0, SIGN = 0, SAE = 4))]
10959#[rustc_legacy_const_generics(2, 3, 4)]
10960pub fn _mm512_maskz_getmant_round_ps<
10961 const NORM: _MM_MANTISSA_NORM_ENUM,
10962 const SIGN: _MM_MANTISSA_SIGN_ENUM,
10963 const SAE: i32,
10964>(
10965 k: __mmask16,
10966 a: __m512,
10967) -> __m512 {
10968 unsafe {
10969 static_assert_uimm_bits!(NORM, 4);
10970 static_assert_uimm_bits!(SIGN, 2);
10971 static_assert_mantissas_sae!(SAE);
10972 let a: Simd = a.as_f32x16();
10973 let r: Simd = vgetmantps(a, SIGN << 2 | NORM, src:f32x16::ZERO, m:k, SAE);
10974 transmute(src:r)
10975 }
10976}
10977
10978/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
10979/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
10980/// _MM_MANT_NORM_1_2 // interval [1, 2)\
10981/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
10982/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
10983/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
10984/// The sign is determined by sc which can take the following values:\
10985/// _MM_MANT_SIGN_src // sign = sign(src)\
10986/// _MM_MANT_SIGN_zero // sign = 0\
10987/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1\
10988/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10989///
10990/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_getmant_round_pd&expand=2883)
10991#[inline]
10992#[target_feature(enable = "avx512f")]
10993#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10994#[cfg_attr(test, assert_instr(vgetmantpd, NORM = 0, SIGN = 0, SAE = 4))]
10995#[rustc_legacy_const_generics(1, 2, 3)]
10996pub fn _mm512_getmant_round_pd<
10997 const NORM: _MM_MANTISSA_NORM_ENUM,
10998 const SIGN: _MM_MANTISSA_SIGN_ENUM,
10999 const SAE: i32,
11000>(
11001 a: __m512d,
11002) -> __m512d {
11003 unsafe {
11004 static_assert_uimm_bits!(NORM, 4);
11005 static_assert_uimm_bits!(SIGN, 2);
11006 static_assert_mantissas_sae!(SAE);
11007 let a: Simd = a.as_f64x8();
11008 let r: Simd = vgetmantpd(a, SIGN << 2 | NORM, src:f64x8::ZERO, m:0b11111111, SAE);
11009 transmute(src:r)
11010 }
11011}
11012
11013/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
11014/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
11015/// _MM_MANT_NORM_1_2 // interval [1, 2)\
11016/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
11017/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
11018/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
11019/// The sign is determined by sc which can take the following values:\
11020/// _MM_MANT_SIGN_src // sign = sign(src)\
11021/// _MM_MANT_SIGN_zero // sign = 0\
11022/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1\
11023/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
11024///
11025/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_getmant_round_pd&expand=2884)
11026#[inline]
11027#[target_feature(enable = "avx512f")]
11028#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11029#[cfg_attr(test, assert_instr(vgetmantpd, NORM = 0, SIGN = 0, SAE = 4))]
11030#[rustc_legacy_const_generics(3, 4, 5)]
11031pub fn _mm512_mask_getmant_round_pd<
11032 const NORM: _MM_MANTISSA_NORM_ENUM,
11033 const SIGN: _MM_MANTISSA_SIGN_ENUM,
11034 const SAE: i32,
11035>(
11036 src: __m512d,
11037 k: __mmask8,
11038 a: __m512d,
11039) -> __m512d {
11040 unsafe {
11041 static_assert_uimm_bits!(NORM, 4);
11042 static_assert_uimm_bits!(SIGN, 2);
11043 static_assert_mantissas_sae!(SAE);
11044 let a: Simd = a.as_f64x8();
11045 let src: Simd = src.as_f64x8();
11046 let r: Simd = vgetmantpd(a, SIGN << 2 | NORM, src, m:k, SAE);
11047 transmute(src:r)
11048 }
11049}
11050
11051/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
11052/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
11053/// _MM_MANT_NORM_1_2 // interval [1, 2)\
11054/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
11055/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
11056/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
11057/// The sign is determined by sc which can take the following values:\
11058/// _MM_MANT_SIGN_src // sign = sign(src)\
11059/// _MM_MANT_SIGN_zero // sign = 0\
11060/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1\
11061/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
11062///
11063/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_getmant_round_pd&expand=2885)
11064#[inline]
11065#[target_feature(enable = "avx512f")]
11066#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11067#[cfg_attr(test, assert_instr(vgetmantpd, NORM = 0, SIGN = 0, SAE = 4))]
11068#[rustc_legacy_const_generics(2, 3, 4)]
11069pub fn _mm512_maskz_getmant_round_pd<
11070 const NORM: _MM_MANTISSA_NORM_ENUM,
11071 const SIGN: _MM_MANTISSA_SIGN_ENUM,
11072 const SAE: i32,
11073>(
11074 k: __mmask8,
11075 a: __m512d,
11076) -> __m512d {
11077 unsafe {
11078 static_assert_uimm_bits!(NORM, 4);
11079 static_assert_uimm_bits!(SIGN, 2);
11080 static_assert_mantissas_sae!(SAE);
11081 let a: Simd = a.as_f64x8();
11082 let r: Simd = vgetmantpd(a, SIGN << 2 | NORM, src:f64x8::ZERO, m:k, SAE);
11083 transmute(src:r)
11084 }
11085}
11086
11087/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst.
11088///
11089/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtps_epi32&expand=1737)
11090#[inline]
11091#[target_feature(enable = "avx512f")]
11092#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11093#[cfg_attr(test, assert_instr(vcvtps2dq))]
11094pub fn _mm512_cvtps_epi32(a: __m512) -> __m512i {
11095 unsafe {
11096 transmute(src:vcvtps2dq(
11097 a.as_f32x16(),
11098 src:i32x16::ZERO,
11099 mask:0b11111111_11111111,
11100 _MM_FROUND_CUR_DIRECTION,
11101 ))
11102 }
11103}
11104
11105/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11106///
11107/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtps_epi32&expand=1738)
11108#[inline]
11109#[target_feature(enable = "avx512f")]
11110#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11111#[cfg_attr(test, assert_instr(vcvtps2dq))]
11112pub fn _mm512_mask_cvtps_epi32(src: __m512i, k: __mmask16, a: __m512) -> __m512i {
11113 unsafe {
11114 transmute(src:vcvtps2dq(
11115 a.as_f32x16(),
11116 src.as_i32x16(),
11117 mask:k,
11118 _MM_FROUND_CUR_DIRECTION,
11119 ))
11120 }
11121}
11122
11123/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11124///
11125/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtps_epi32&expand=1739)
11126#[inline]
11127#[target_feature(enable = "avx512f")]
11128#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11129#[cfg_attr(test, assert_instr(vcvtps2dq))]
11130pub fn _mm512_maskz_cvtps_epi32(k: __mmask16, a: __m512) -> __m512i {
11131 unsafe {
11132 transmute(src:vcvtps2dq(
11133 a.as_f32x16(),
11134 src:i32x16::ZERO,
11135 mask:k,
11136 _MM_FROUND_CUR_DIRECTION,
11137 ))
11138 }
11139}
11140
11141/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11142///
11143/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtps_epi32&expand=1735)
11144#[inline]
11145#[target_feature(enable = "avx512f,avx512vl")]
11146#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11147#[cfg_attr(test, assert_instr(vcvtps2dq))]
11148pub fn _mm256_mask_cvtps_epi32(src: __m256i, k: __mmask8, a: __m256) -> __m256i {
11149 unsafe {
11150 let convert: __m256i = _mm256_cvtps_epi32(a);
11151 transmute(src:simd_select_bitmask(m:k, yes:convert.as_i32x8(), no:src.as_i32x8()))
11152 }
11153}
11154
11155/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11156///
11157/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtps_epi32&expand=1736)
11158#[inline]
11159#[target_feature(enable = "avx512f,avx512vl")]
11160#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11161#[cfg_attr(test, assert_instr(vcvtps2dq))]
11162pub fn _mm256_maskz_cvtps_epi32(k: __mmask8, a: __m256) -> __m256i {
11163 unsafe {
11164 let convert: __m256i = _mm256_cvtps_epi32(a);
11165 transmute(src:simd_select_bitmask(m:k, yes:convert.as_i32x8(), no:i32x8::ZERO))
11166 }
11167}
11168
11169/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11170///
11171/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtps_epi32&expand=1732)
11172#[inline]
11173#[target_feature(enable = "avx512f,avx512vl")]
11174#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11175#[cfg_attr(test, assert_instr(vcvtps2dq))]
11176pub fn _mm_mask_cvtps_epi32(src: __m128i, k: __mmask8, a: __m128) -> __m128i {
11177 unsafe {
11178 let convert: __m128i = _mm_cvtps_epi32(a);
11179 transmute(src:simd_select_bitmask(m:k, yes:convert.as_i32x4(), no:src.as_i32x4()))
11180 }
11181}
11182
11183/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11184///
11185/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtps_epi32&expand=1733)
11186#[inline]
11187#[target_feature(enable = "avx512f,avx512vl")]
11188#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11189#[cfg_attr(test, assert_instr(vcvtps2dq))]
11190pub fn _mm_maskz_cvtps_epi32(k: __mmask8, a: __m128) -> __m128i {
11191 unsafe {
11192 let convert: __m128i = _mm_cvtps_epi32(a);
11193 transmute(src:simd_select_bitmask(m:k, yes:convert.as_i32x4(), no:i32x4::ZERO))
11194 }
11195}
11196
11197/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst.
11198///
11199/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtps_epu32&expand=1755)
11200#[inline]
11201#[target_feature(enable = "avx512f")]
11202#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11203#[cfg_attr(test, assert_instr(vcvtps2udq))]
11204pub fn _mm512_cvtps_epu32(a: __m512) -> __m512i {
11205 unsafe {
11206 transmute(src:vcvtps2udq(
11207 a.as_f32x16(),
11208 src:u32x16::ZERO,
11209 mask:0b11111111_11111111,
11210 _MM_FROUND_CUR_DIRECTION,
11211 ))
11212 }
11213}
11214
11215/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11216///
11217/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtps_epu32&expand=1756)
11218#[inline]
11219#[target_feature(enable = "avx512f")]
11220#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11221#[cfg_attr(test, assert_instr(vcvtps2udq))]
11222pub fn _mm512_mask_cvtps_epu32(src: __m512i, k: __mmask16, a: __m512) -> __m512i {
11223 unsafe {
11224 transmute(src:vcvtps2udq(
11225 a.as_f32x16(),
11226 src.as_u32x16(),
11227 mask:k,
11228 _MM_FROUND_CUR_DIRECTION,
11229 ))
11230 }
11231}
11232
11233/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11234///
11235/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtps_epu32&expand=1343)
11236#[inline]
11237#[target_feature(enable = "avx512f")]
11238#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11239#[cfg_attr(test, assert_instr(vcvtps2udq))]
11240pub fn _mm512_maskz_cvtps_epu32(k: __mmask16, a: __m512) -> __m512i {
11241 unsafe {
11242 transmute(src:vcvtps2udq(
11243 a.as_f32x16(),
11244 src:u32x16::ZERO,
11245 mask:k,
11246 _MM_FROUND_CUR_DIRECTION,
11247 ))
11248 }
11249}
11250
11251/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst.
11252///
11253/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtps_epu32&expand=1752)
11254#[inline]
11255#[target_feature(enable = "avx512f,avx512vl")]
11256#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11257#[cfg_attr(test, assert_instr(vcvtps2udq))]
11258pub fn _mm256_cvtps_epu32(a: __m256) -> __m256i {
11259 unsafe { transmute(src:vcvtps2udq256(a.as_f32x8(), src:u32x8::ZERO, mask:0b11111111)) }
11260}
11261
11262/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11263///
11264/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtps_epu32&expand=1753)
11265#[inline]
11266#[target_feature(enable = "avx512f,avx512vl")]
11267#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11268#[cfg_attr(test, assert_instr(vcvtps2udq))]
11269pub fn _mm256_mask_cvtps_epu32(src: __m256i, k: __mmask8, a: __m256) -> __m256i {
11270 unsafe { transmute(src:vcvtps2udq256(a.as_f32x8(), src.as_u32x8(), mask:k)) }
11271}
11272
11273/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11274///
11275/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtps_epu32&expand=1754)
11276#[inline]
11277#[target_feature(enable = "avx512f,avx512vl")]
11278#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11279#[cfg_attr(test, assert_instr(vcvtps2udq))]
11280pub fn _mm256_maskz_cvtps_epu32(k: __mmask8, a: __m256) -> __m256i {
11281 unsafe { transmute(src:vcvtps2udq256(a.as_f32x8(), src:u32x8::ZERO, mask:k)) }
11282}
11283
11284/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst.
11285///
11286/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtps_epu32&expand=1749)
11287#[inline]
11288#[target_feature(enable = "avx512f,avx512vl")]
11289#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11290#[cfg_attr(test, assert_instr(vcvtps2udq))]
11291pub fn _mm_cvtps_epu32(a: __m128) -> __m128i {
11292 unsafe { transmute(src:vcvtps2udq128(a.as_f32x4(), src:u32x4::ZERO, mask:0b11111111)) }
11293}
11294
11295/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11296///
11297/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtps_epu32&expand=1750)
11298#[inline]
11299#[target_feature(enable = "avx512f,avx512vl")]
11300#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11301#[cfg_attr(test, assert_instr(vcvtps2udq))]
11302pub fn _mm_mask_cvtps_epu32(src: __m128i, k: __mmask8, a: __m128) -> __m128i {
11303 unsafe { transmute(src:vcvtps2udq128(a.as_f32x4(), src.as_u32x4(), mask:k)) }
11304}
11305
11306/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11307///
11308/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtps_epu32&expand=1751)
11309#[inline]
11310#[target_feature(enable = "avx512f,avx512vl")]
11311#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11312#[cfg_attr(test, assert_instr(vcvtps2udq))]
11313pub fn _mm_maskz_cvtps_epu32(k: __mmask8, a: __m128) -> __m128i {
11314 unsafe { transmute(src:vcvtps2udq128(a.as_f32x4(), src:u32x4::ZERO, mask:k)) }
11315}
11316
11317/// Convert packed single-precision (32-bit) floating-point elements in a to packed double-precision (64-bit) floating-point elements, and store the results in dst.
11318///
11319/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtps_pd&expand=1769)
11320#[inline]
11321#[target_feature(enable = "avx512f")]
11322#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11323#[cfg_attr(test, assert_instr(vcvtps2pd))]
11324pub fn _mm512_cvtps_pd(a: __m256) -> __m512d {
11325 unsafe {
11326 transmute(src:vcvtps2pd(
11327 a.as_f32x8(),
11328 src:f64x8::ZERO,
11329 mask:0b11111111,
11330 _MM_FROUND_CUR_DIRECTION,
11331 ))
11332 }
11333}
11334
11335/// Convert packed single-precision (32-bit) floating-point elements in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11336///
11337/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtps_pd&expand=1770)
11338#[inline]
11339#[target_feature(enable = "avx512f")]
11340#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11341#[cfg_attr(test, assert_instr(vcvtps2pd))]
11342pub fn _mm512_mask_cvtps_pd(src: __m512d, k: __mmask8, a: __m256) -> __m512d {
11343 unsafe {
11344 transmute(src:vcvtps2pd(
11345 a.as_f32x8(),
11346 src.as_f64x8(),
11347 mask:k,
11348 _MM_FROUND_CUR_DIRECTION,
11349 ))
11350 }
11351}
11352
11353/// Convert packed single-precision (32-bit) floating-point elements in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11354///
11355/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtps_pd&expand=1771)
11356#[inline]
11357#[target_feature(enable = "avx512f")]
11358#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11359#[cfg_attr(test, assert_instr(vcvtps2pd))]
11360pub fn _mm512_maskz_cvtps_pd(k: __mmask8, a: __m256) -> __m512d {
11361 unsafe {
11362 transmute(src:vcvtps2pd(
11363 a.as_f32x8(),
11364 src:f64x8::ZERO,
11365 mask:k,
11366 _MM_FROUND_CUR_DIRECTION,
11367 ))
11368 }
11369}
11370
11371/// Performs element-by-element conversion of the lower half of packed single-precision (32-bit) floating-point elements in v2 to packed double-precision (64-bit) floating-point elements, storing the results in dst.
11372///
11373/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtpslo_pd&expand=1784)
11374#[inline]
11375#[target_feature(enable = "avx512f")]
11376#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11377#[cfg_attr(test, assert_instr(vcvtps2pd))]
11378pub fn _mm512_cvtpslo_pd(v2: __m512) -> __m512d {
11379 unsafe {
11380 transmute(src:vcvtps2pd(
11381 a:_mm512_castps512_ps256(v2).as_f32x8(),
11382 src:f64x8::ZERO,
11383 mask:0b11111111,
11384 _MM_FROUND_CUR_DIRECTION,
11385 ))
11386 }
11387}
11388
11389/// Performs element-by-element conversion of the lower half of packed single-precision (32-bit) floating-point elements in v2 to packed double-precision (64-bit) floating-point elements, storing the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11390///
11391/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtpslo_pd&expand=1785)
11392#[inline]
11393#[target_feature(enable = "avx512f")]
11394#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11395#[cfg_attr(test, assert_instr(vcvtps2pd))]
11396pub fn _mm512_mask_cvtpslo_pd(src: __m512d, k: __mmask8, v2: __m512) -> __m512d {
11397 unsafe {
11398 transmute(src:vcvtps2pd(
11399 a:_mm512_castps512_ps256(v2).as_f32x8(),
11400 src.as_f64x8(),
11401 mask:k,
11402 _MM_FROUND_CUR_DIRECTION,
11403 ))
11404 }
11405}
11406
11407/// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst.
11408///
11409/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtpd_ps&expand=1712)
11410#[inline]
11411#[target_feature(enable = "avx512f")]
11412#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11413#[cfg_attr(test, assert_instr(vcvtpd2ps))]
11414pub fn _mm512_cvtpd_ps(a: __m512d) -> __m256 {
11415 unsafe {
11416 transmute(src:vcvtpd2ps(
11417 a.as_f64x8(),
11418 src:f32x8::ZERO,
11419 mask:0b11111111,
11420 _MM_FROUND_CUR_DIRECTION,
11421 ))
11422 }
11423}
11424
11425/// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11426///
11427/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtpd_ps&expand=1713)
11428#[inline]
11429#[target_feature(enable = "avx512f")]
11430#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11431#[cfg_attr(test, assert_instr(vcvtpd2ps))]
11432pub fn _mm512_mask_cvtpd_ps(src: __m256, k: __mmask8, a: __m512d) -> __m256 {
11433 unsafe {
11434 transmute(src:vcvtpd2ps(
11435 a.as_f64x8(),
11436 src.as_f32x8(),
11437 mask:k,
11438 _MM_FROUND_CUR_DIRECTION,
11439 ))
11440 }
11441}
11442
11443/// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11444///
11445/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtpd_ps&expand=1714)
11446#[inline]
11447#[target_feature(enable = "avx512f")]
11448#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11449#[cfg_attr(test, assert_instr(vcvtpd2ps))]
11450pub fn _mm512_maskz_cvtpd_ps(k: __mmask8, a: __m512d) -> __m256 {
11451 unsafe {
11452 transmute(src:vcvtpd2ps(
11453 a.as_f64x8(),
11454 src:f32x8::ZERO,
11455 mask:k,
11456 _MM_FROUND_CUR_DIRECTION,
11457 ))
11458 }
11459}
11460
11461/// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11462///
11463/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtpd_ps&expand=1710)
11464#[inline]
11465#[target_feature(enable = "avx512f,avx512vl")]
11466#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11467#[cfg_attr(test, assert_instr(vcvtpd2ps))]
11468pub fn _mm256_mask_cvtpd_ps(src: __m128, k: __mmask8, a: __m256d) -> __m128 {
11469 unsafe {
11470 let convert: __m128 = _mm256_cvtpd_ps(a);
11471 transmute(src:simd_select_bitmask(m:k, yes:convert.as_f32x4(), no:src.as_f32x4()))
11472 }
11473}
11474
11475/// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11476///
11477/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtpd_ps&expand=1711)
11478#[inline]
11479#[target_feature(enable = "avx512f,avx512vl")]
11480#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11481#[cfg_attr(test, assert_instr(vcvtpd2ps))]
11482pub fn _mm256_maskz_cvtpd_ps(k: __mmask8, a: __m256d) -> __m128 {
11483 unsafe {
11484 let convert: __m128 = _mm256_cvtpd_ps(a);
11485 transmute(src:simd_select_bitmask(m:k, yes:convert.as_f32x4(), no:f32x4::ZERO))
11486 }
11487}
11488
11489/// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11490///
11491/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtpd_ps&expand=1707)
11492#[inline]
11493#[target_feature(enable = "avx512f,avx512vl")]
11494#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11495#[cfg_attr(test, assert_instr(vcvtpd2ps))]
11496pub fn _mm_mask_cvtpd_ps(src: __m128, k: __mmask8, a: __m128d) -> __m128 {
11497 unsafe { vcvtpd2ps128(a.as_f64x2(), src.as_f32x4(), mask:k).as_m128() }
11498}
11499
11500/// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11501///
11502/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtpd_ps&expand=1708)
11503#[inline]
11504#[target_feature(enable = "avx512f,avx512vl")]
11505#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11506#[cfg_attr(test, assert_instr(vcvtpd2ps))]
11507pub fn _mm_maskz_cvtpd_ps(k: __mmask8, a: __m128d) -> __m128 {
11508 unsafe {
11509 let convert: __m128 = _mm_cvtpd_ps(a);
11510 transmute(src:simd_select_bitmask(m:k, yes:convert.as_f32x4(), no:f32x4::ZERO))
11511 }
11512}
11513
11514/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst.
11515///
11516/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtpd_epi32&expand=1675)
11517#[inline]
11518#[target_feature(enable = "avx512f")]
11519#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11520#[cfg_attr(test, assert_instr(vcvtpd2dq))]
11521pub fn _mm512_cvtpd_epi32(a: __m512d) -> __m256i {
11522 unsafe {
11523 transmute(src:vcvtpd2dq(
11524 a.as_f64x8(),
11525 src:i32x8::ZERO,
11526 mask:0b11111111,
11527 _MM_FROUND_CUR_DIRECTION,
11528 ))
11529 }
11530}
11531
11532/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11533///
11534/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtpd_epi32&expand=1676)
11535#[inline]
11536#[target_feature(enable = "avx512f")]
11537#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11538#[cfg_attr(test, assert_instr(vcvtpd2dq))]
11539pub fn _mm512_mask_cvtpd_epi32(src: __m256i, k: __mmask8, a: __m512d) -> __m256i {
11540 unsafe {
11541 transmute(src:vcvtpd2dq(
11542 a.as_f64x8(),
11543 src.as_i32x8(),
11544 mask:k,
11545 _MM_FROUND_CUR_DIRECTION,
11546 ))
11547 }
11548}
11549
11550/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11551///
11552/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtpd_epi32&expand=1677)
11553#[inline]
11554#[target_feature(enable = "avx512f")]
11555#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11556#[cfg_attr(test, assert_instr(vcvtpd2dq))]
11557pub fn _mm512_maskz_cvtpd_epi32(k: __mmask8, a: __m512d) -> __m256i {
11558 unsafe {
11559 transmute(src:vcvtpd2dq(
11560 a.as_f64x8(),
11561 src:i32x8::ZERO,
11562 mask:k,
11563 _MM_FROUND_CUR_DIRECTION,
11564 ))
11565 }
11566}
11567
11568/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11569///
11570/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtpd_epi32&expand=1673)
11571#[inline]
11572#[target_feature(enable = "avx512f,avx512vl")]
11573#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11574#[cfg_attr(test, assert_instr(vcvtpd2dq))]
11575pub fn _mm256_mask_cvtpd_epi32(src: __m128i, k: __mmask8, a: __m256d) -> __m128i {
11576 unsafe {
11577 let convert: __m128i = _mm256_cvtpd_epi32(a);
11578 transmute(src:simd_select_bitmask(m:k, yes:convert.as_i32x4(), no:src.as_i32x4()))
11579 }
11580}
11581
11582/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11583///
11584/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtpd_epi32&expand=1674)
11585#[inline]
11586#[target_feature(enable = "avx512f,avx512vl")]
11587#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11588#[cfg_attr(test, assert_instr(vcvtpd2dq))]
11589pub fn _mm256_maskz_cvtpd_epi32(k: __mmask8, a: __m256d) -> __m128i {
11590 unsafe {
11591 let convert: __m128i = _mm256_cvtpd_epi32(a);
11592 transmute(src:simd_select_bitmask(m:k, yes:convert.as_i32x4(), no:i32x4::ZERO))
11593 }
11594}
11595
11596/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11597///
11598/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtpd_epi32&expand=1670)
11599#[inline]
11600#[target_feature(enable = "avx512f,avx512vl")]
11601#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11602#[cfg_attr(test, assert_instr(vcvtpd2dq))]
11603pub fn _mm_mask_cvtpd_epi32(src: __m128i, k: __mmask8, a: __m128d) -> __m128i {
11604 unsafe { vcvtpd2dq128(a.as_f64x2(), src.as_i32x4(), k).as_m128i() }
11605}
11606
11607/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11608///
11609/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtpd_epi32&expand=1671)
11610#[inline]
11611#[target_feature(enable = "avx512f,avx512vl")]
11612#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11613#[cfg_attr(test, assert_instr(vcvtpd2dq))]
11614pub fn _mm_maskz_cvtpd_epi32(k: __mmask8, a: __m128d) -> __m128i {
11615 unsafe {
11616 let convert: __m128i = _mm_cvtpd_epi32(a);
11617 transmute(src:simd_select_bitmask(m:k, yes:convert.as_i32x4(), no:i32x4::ZERO))
11618 }
11619}
11620
11621/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst.
11622///
11623/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtpd_epu32&expand=1693)
11624#[inline]
11625#[target_feature(enable = "avx512f")]
11626#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11627#[cfg_attr(test, assert_instr(vcvtpd2udq))]
11628pub fn _mm512_cvtpd_epu32(a: __m512d) -> __m256i {
11629 unsafe {
11630 transmute(src:vcvtpd2udq(
11631 a.as_f64x8(),
11632 src:u32x8::ZERO,
11633 mask:0b11111111,
11634 _MM_FROUND_CUR_DIRECTION,
11635 ))
11636 }
11637}
11638
11639/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11640///
11641/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtpd_epu32&expand=1694)
11642#[inline]
11643#[target_feature(enable = "avx512f")]
11644#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11645#[cfg_attr(test, assert_instr(vcvtpd2udq))]
11646pub fn _mm512_mask_cvtpd_epu32(src: __m256i, k: __mmask8, a: __m512d) -> __m256i {
11647 unsafe {
11648 transmute(src:vcvtpd2udq(
11649 a.as_f64x8(),
11650 src.as_u32x8(),
11651 mask:k,
11652 _MM_FROUND_CUR_DIRECTION,
11653 ))
11654 }
11655}
11656
11657/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11658///
11659/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtpd_epu32&expand=1695)
11660#[inline]
11661#[target_feature(enable = "avx512f")]
11662#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11663#[cfg_attr(test, assert_instr(vcvtpd2udq))]
11664pub fn _mm512_maskz_cvtpd_epu32(k: __mmask8, a: __m512d) -> __m256i {
11665 unsafe {
11666 transmute(src:vcvtpd2udq(
11667 a.as_f64x8(),
11668 src:u32x8::ZERO,
11669 mask:k,
11670 _MM_FROUND_CUR_DIRECTION,
11671 ))
11672 }
11673}
11674
11675/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst.
11676///
11677/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtpd_epu32&expand=1690)
11678#[inline]
11679#[target_feature(enable = "avx512f,avx512vl")]
11680#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11681#[cfg_attr(test, assert_instr(vcvtpd2udq))]
11682pub fn _mm256_cvtpd_epu32(a: __m256d) -> __m128i {
11683 unsafe { transmute(src:vcvtpd2udq256(a.as_f64x4(), src:u32x4::ZERO, mask:0b11111111)) }
11684}
11685
11686/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11687///
11688/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtpd_epu32&expand=1691)
11689#[inline]
11690#[target_feature(enable = "avx512f,avx512vl")]
11691#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11692#[cfg_attr(test, assert_instr(vcvtpd2udq))]
11693pub fn _mm256_mask_cvtpd_epu32(src: __m128i, k: __mmask8, a: __m256d) -> __m128i {
11694 unsafe { transmute(src:vcvtpd2udq256(a.as_f64x4(), src.as_u32x4(), mask:k)) }
11695}
11696
11697/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11698///
11699/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtpd_epu32&expand=1692)
11700#[inline]
11701#[target_feature(enable = "avx512f,avx512vl")]
11702#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11703#[cfg_attr(test, assert_instr(vcvtpd2udq))]
11704pub fn _mm256_maskz_cvtpd_epu32(k: __mmask8, a: __m256d) -> __m128i {
11705 unsafe { transmute(src:vcvtpd2udq256(a.as_f64x4(), src:u32x4::ZERO, mask:k)) }
11706}
11707
11708/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst.
11709///
11710/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtpd_epu32&expand=1687)
11711#[inline]
11712#[target_feature(enable = "avx512f,avx512vl")]
11713#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11714#[cfg_attr(test, assert_instr(vcvtpd2udq))]
11715pub fn _mm_cvtpd_epu32(a: __m128d) -> __m128i {
11716 unsafe { transmute(src:vcvtpd2udq128(a.as_f64x2(), src:u32x4::ZERO, mask:0b11111111)) }
11717}
11718
11719/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11720///
11721/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtpd_epu32&expand=1688)
11722#[inline]
11723#[target_feature(enable = "avx512f,avx512vl")]
11724#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11725#[cfg_attr(test, assert_instr(vcvtpd2udq))]
11726pub fn _mm_mask_cvtpd_epu32(src: __m128i, k: __mmask8, a: __m128d) -> __m128i {
11727 unsafe { transmute(src:vcvtpd2udq128(a.as_f64x2(), src.as_u32x4(), mask:k)) }
11728}
11729
11730/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11731///
11732/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtpd_epu32&expand=1689)
11733#[inline]
11734#[target_feature(enable = "avx512f,avx512vl")]
11735#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11736#[cfg_attr(test, assert_instr(vcvtpd2udq))]
11737pub fn _mm_maskz_cvtpd_epu32(k: __mmask8, a: __m128d) -> __m128i {
11738 unsafe { transmute(src:vcvtpd2udq128(a.as_f64x2(), src:u32x4::ZERO, mask:k)) }
11739}
11740
11741/// Performs an element-by-element conversion of packed double-precision (64-bit) floating-point elements in v2 to single-precision (32-bit) floating-point elements and stores them in dst. The elements are stored in the lower half of the results vector, while the remaining upper half locations are set to 0.
11742///
11743/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtpd_pslo&expand=1715)
11744#[inline]
11745#[target_feature(enable = "avx512f")]
11746#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11747#[cfg_attr(test, assert_instr(vcvtpd2ps))]
11748pub fn _mm512_cvtpd_pslo(v2: __m512d) -> __m512 {
11749 unsafe {
11750 let r: f32x8 = vcvtpd2ps(
11751 a:v2.as_f64x8(),
11752 src:f32x8::ZERO,
11753 mask:0b11111111,
11754 _MM_FROUND_CUR_DIRECTION,
11755 );
11756 simd_shuffle!(
11757 r,
11758 f32x8::ZERO,
11759 [0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 8, 8, 8],
11760 )
11761 }
11762}
11763
11764/// Performs an element-by-element conversion of packed double-precision (64-bit) floating-point elements in v2 to single-precision (32-bit) floating-point elements and stores them in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The elements are stored in the lower half of the results vector, while the remaining upper half locations are set to 0.
11765///
11766/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtpd_pslo&expand=1716)
11767#[inline]
11768#[target_feature(enable = "avx512f")]
11769#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11770#[cfg_attr(test, assert_instr(vcvtpd2ps))]
11771pub fn _mm512_mask_cvtpd_pslo(src: __m512, k: __mmask8, v2: __m512d) -> __m512 {
11772 unsafe {
11773 let r: f32x8 = vcvtpd2ps(
11774 a:v2.as_f64x8(),
11775 src:_mm512_castps512_ps256(src).as_f32x8(),
11776 mask:k,
11777 _MM_FROUND_CUR_DIRECTION,
11778 );
11779 simd_shuffle!(
11780 r,
11781 f32x8::ZERO,
11782 [0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 8, 8, 8],
11783 )
11784 }
11785}
11786
11787/// Sign extend packed 8-bit integers in a to packed 32-bit integers, and store the results in dst.
11788///
11789/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi8_epi32&expand=1535)
11790#[inline]
11791#[target_feature(enable = "avx512f")]
11792#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11793#[cfg_attr(test, assert_instr(vpmovsxbd))]
11794#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11795pub const fn _mm512_cvtepi8_epi32(a: __m128i) -> __m512i {
11796 unsafe {
11797 let a: Simd = a.as_i8x16();
11798 transmute::<i32x16, _>(src:simd_cast(a))
11799 }
11800}
11801
11802/// Sign extend packed 8-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11803///
11804/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi8_epi32&expand=1536)
11805#[inline]
11806#[target_feature(enable = "avx512f")]
11807#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11808#[cfg_attr(test, assert_instr(vpmovsxbd))]
11809#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11810pub const fn _mm512_mask_cvtepi8_epi32(src: __m512i, k: __mmask16, a: __m128i) -> __m512i {
11811 unsafe {
11812 let convert: Simd = _mm512_cvtepi8_epi32(a).as_i32x16();
11813 transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i32x16()))
11814 }
11815}
11816
11817/// Sign extend packed 8-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11818///
11819/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi8_epi32&expand=1537)
11820#[inline]
11821#[target_feature(enable = "avx512f")]
11822#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11823#[cfg_attr(test, assert_instr(vpmovsxbd))]
11824#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11825pub const fn _mm512_maskz_cvtepi8_epi32(k: __mmask16, a: __m128i) -> __m512i {
11826 unsafe {
11827 let convert: Simd = _mm512_cvtepi8_epi32(a).as_i32x16();
11828 transmute(src:simd_select_bitmask(m:k, yes:convert, no:i32x16::ZERO))
11829 }
11830}
11831
11832/// Sign extend packed 8-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11833///
11834/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi8_epi32&expand=1533)
11835#[inline]
11836#[target_feature(enable = "avx512f,avx512vl")]
11837#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11838#[cfg_attr(test, assert_instr(vpmovsxbd))]
11839#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11840pub const fn _mm256_mask_cvtepi8_epi32(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
11841 unsafe {
11842 let convert: Simd = _mm256_cvtepi8_epi32(a).as_i32x8();
11843 transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i32x8()))
11844 }
11845}
11846
11847/// Sign extend packed 8-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11848///
11849/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi8_epi32&expand=1534)
11850#[inline]
11851#[target_feature(enable = "avx512f,avx512vl")]
11852#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11853#[cfg_attr(test, assert_instr(vpmovsxbd))]
11854#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11855pub const fn _mm256_maskz_cvtepi8_epi32(k: __mmask8, a: __m128i) -> __m256i {
11856 unsafe {
11857 let convert: Simd = _mm256_cvtepi8_epi32(a).as_i32x8();
11858 transmute(src:simd_select_bitmask(m:k, yes:convert, no:i32x8::ZERO))
11859 }
11860}
11861
11862/// Sign extend packed 8-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11863///
11864/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi8_epi32&expand=1530)
11865#[inline]
11866#[target_feature(enable = "avx512f,avx512vl")]
11867#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11868#[cfg_attr(test, assert_instr(vpmovsxbd))]
11869#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11870pub const fn _mm_mask_cvtepi8_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
11871 unsafe {
11872 let convert: Simd = _mm_cvtepi8_epi32(a).as_i32x4();
11873 transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i32x4()))
11874 }
11875}
11876
11877/// Sign extend packed 8-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11878///
11879/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi8_epi32&expand=1531)
11880#[inline]
11881#[target_feature(enable = "avx512f,avx512vl")]
11882#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11883#[cfg_attr(test, assert_instr(vpmovsxbd))]
11884#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11885pub const fn _mm_maskz_cvtepi8_epi32(k: __mmask8, a: __m128i) -> __m128i {
11886 unsafe {
11887 let convert: Simd = _mm_cvtepi8_epi32(a).as_i32x4();
11888 transmute(src:simd_select_bitmask(m:k, yes:convert, no:i32x4::ZERO))
11889 }
11890}
11891
11892/// Sign extend packed 8-bit integers in the low 8 bytes of a to packed 64-bit integers, and store the results in dst.
11893///
11894/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi8_epi64&expand=1544)
11895#[inline]
11896#[target_feature(enable = "avx512f")]
11897#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11898#[cfg_attr(test, assert_instr(vpmovsxbq))]
11899#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11900pub const fn _mm512_cvtepi8_epi64(a: __m128i) -> __m512i {
11901 unsafe {
11902 let a: Simd = a.as_i8x16();
11903 let v64: i8x8 = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]);
11904 transmute::<i64x8, _>(src:simd_cast(v64))
11905 }
11906}
11907
11908/// Sign extend packed 8-bit integers in the low 8 bytes of a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11909///
11910/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi8_epi64&expand=1545)
11911#[inline]
11912#[target_feature(enable = "avx512f")]
11913#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11914#[cfg_attr(test, assert_instr(vpmovsxbq))]
11915#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11916pub const fn _mm512_mask_cvtepi8_epi64(src: __m512i, k: __mmask8, a: __m128i) -> __m512i {
11917 unsafe {
11918 let convert: Simd = _mm512_cvtepi8_epi64(a).as_i64x8();
11919 transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i64x8()))
11920 }
11921}
11922
11923/// Sign extend packed 8-bit integers in the low 8 bytes of a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11924///
11925/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi8_epi64&expand=1546)
11926#[inline]
11927#[target_feature(enable = "avx512f")]
11928#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11929#[cfg_attr(test, assert_instr(vpmovsxbq))]
11930#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11931pub const fn _mm512_maskz_cvtepi8_epi64(k: __mmask8, a: __m128i) -> __m512i {
11932 unsafe {
11933 let convert: Simd = _mm512_cvtepi8_epi64(a).as_i64x8();
11934 transmute(src:simd_select_bitmask(m:k, yes:convert, no:i64x8::ZERO))
11935 }
11936}
11937
11938/// Sign extend packed 8-bit integers in the low 4 bytes of a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11939///
11940/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi8_epi64&expand=1542)
11941#[inline]
11942#[target_feature(enable = "avx512f,avx512vl")]
11943#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11944#[cfg_attr(test, assert_instr(vpmovsxbq))]
11945#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11946pub const fn _mm256_mask_cvtepi8_epi64(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
11947 unsafe {
11948 let convert: Simd = _mm256_cvtepi8_epi64(a).as_i64x4();
11949 transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i64x4()))
11950 }
11951}
11952
11953/// Sign extend packed 8-bit integers in the low 4 bytes of a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11954///
11955/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi8_epi64&expand=1543)
11956#[inline]
11957#[target_feature(enable = "avx512f,avx512vl")]
11958#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11959#[cfg_attr(test, assert_instr(vpmovsxbq))]
11960#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11961pub const fn _mm256_maskz_cvtepi8_epi64(k: __mmask8, a: __m128i) -> __m256i {
11962 unsafe {
11963 let convert: Simd = _mm256_cvtepi8_epi64(a).as_i64x4();
11964 transmute(src:simd_select_bitmask(m:k, yes:convert, no:i64x4::ZERO))
11965 }
11966}
11967
11968/// Sign extend packed 8-bit integers in the low 2 bytes of a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11969///
11970/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi8_epi64&expand=1539)
11971#[inline]
11972#[target_feature(enable = "avx512f,avx512vl")]
11973#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11974#[cfg_attr(test, assert_instr(vpmovsxbq))]
11975#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11976pub const fn _mm_mask_cvtepi8_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
11977 unsafe {
11978 let convert: Simd = _mm_cvtepi8_epi64(a).as_i64x2();
11979 transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i64x2()))
11980 }
11981}
11982
11983/// Sign extend packed 8-bit integers in the low 2 bytes of a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11984///
11985/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi8_epi64&expand=1540)
11986#[inline]
11987#[target_feature(enable = "avx512f,avx512vl")]
11988#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11989#[cfg_attr(test, assert_instr(vpmovsxbq))]
11990#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11991pub const fn _mm_maskz_cvtepi8_epi64(k: __mmask8, a: __m128i) -> __m128i {
11992 unsafe {
11993 let convert: Simd = _mm_cvtepi8_epi64(a).as_i64x2();
11994 transmute(src:simd_select_bitmask(m:k, yes:convert, no:i64x2::ZERO))
11995 }
11996}
11997
11998/// Zero extend packed unsigned 8-bit integers in a to packed 32-bit integers, and store the results in dst.
11999///
12000/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepu8_epi32&expand=1621)
12001#[inline]
12002#[target_feature(enable = "avx512f")]
12003#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12004#[cfg_attr(test, assert_instr(vpmovzxbd))]
12005#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12006pub const fn _mm512_cvtepu8_epi32(a: __m128i) -> __m512i {
12007 unsafe {
12008 let a: Simd = a.as_u8x16();
12009 transmute::<i32x16, _>(src:simd_cast(a))
12010 }
12011}
12012
12013/// Zero extend packed unsigned 8-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12014///
12015/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepu8_epi32&expand=1622)
12016#[inline]
12017#[target_feature(enable = "avx512f")]
12018#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12019#[cfg_attr(test, assert_instr(vpmovzxbd))]
12020#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12021pub const fn _mm512_mask_cvtepu8_epi32(src: __m512i, k: __mmask16, a: __m128i) -> __m512i {
12022 unsafe {
12023 let convert: Simd = _mm512_cvtepu8_epi32(a).as_i32x16();
12024 transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i32x16()))
12025 }
12026}
12027
12028/// Zero extend packed unsigned 8-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12029///
12030/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepu8_epi32&expand=1623)
12031#[inline]
12032#[target_feature(enable = "avx512f")]
12033#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12034#[cfg_attr(test, assert_instr(vpmovzxbd))]
12035#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12036pub const fn _mm512_maskz_cvtepu8_epi32(k: __mmask16, a: __m128i) -> __m512i {
12037 unsafe {
12038 let convert: Simd = _mm512_cvtepu8_epi32(a).as_i32x16();
12039 transmute(src:simd_select_bitmask(m:k, yes:convert, no:i32x16::ZERO))
12040 }
12041}
12042
12043/// Zero extend packed unsigned 8-bit integers in the low 8 bytes of a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12044///
12045/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepu8_epi32&expand=1619)
12046#[inline]
12047#[target_feature(enable = "avx512f,avx512vl")]
12048#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12049#[cfg_attr(test, assert_instr(vpmovzxbd))]
12050#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12051pub const fn _mm256_mask_cvtepu8_epi32(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
12052 unsafe {
12053 let convert: Simd = _mm256_cvtepu8_epi32(a).as_i32x8();
12054 transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i32x8()))
12055 }
12056}
12057
12058/// Zero extend packed unsigned 8-bit integers in the low 8 bytes of a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12059///
12060/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/IntrinsicsGuide/#text=_mm256_maskz_cvtepu8_epi32&expand=1620)
12061#[inline]
12062#[target_feature(enable = "avx512f,avx512vl")]
12063#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12064#[cfg_attr(test, assert_instr(vpmovzxbd))]
12065#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12066pub const fn _mm256_maskz_cvtepu8_epi32(k: __mmask8, a: __m128i) -> __m256i {
12067 unsafe {
12068 let convert: Simd = _mm256_cvtepu8_epi32(a).as_i32x8();
12069 transmute(src:simd_select_bitmask(m:k, yes:convert, no:i32x8::ZERO))
12070 }
12071}
12072
12073/// Zero extend packed unsigned 8-bit integers in the low 4 bytes of a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12074///
12075/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepu8_epi32&expand=1616)
12076#[inline]
12077#[target_feature(enable = "avx512f,avx512vl")]
12078#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12079#[cfg_attr(test, assert_instr(vpmovzxbd))]
12080#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12081pub const fn _mm_mask_cvtepu8_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
12082 unsafe {
12083 let convert: Simd = _mm_cvtepu8_epi32(a).as_i32x4();
12084 transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i32x4()))
12085 }
12086}
12087
12088/// Zero extend packed unsigned 8-bit integers in th elow 4 bytes of a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12089///
12090/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/IntrinsicsGuide/#text=_mm_maskz_cvtepu8_epi32&expand=1617)
12091#[inline]
12092#[target_feature(enable = "avx512f,avx512vl")]
12093#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12094#[cfg_attr(test, assert_instr(vpmovzxbd))]
12095#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12096pub const fn _mm_maskz_cvtepu8_epi32(k: __mmask8, a: __m128i) -> __m128i {
12097 unsafe {
12098 let convert: Simd = _mm_cvtepu8_epi32(a).as_i32x4();
12099 transmute(src:simd_select_bitmask(m:k, yes:convert, no:i32x4::ZERO))
12100 }
12101}
12102
12103/// Zero extend packed unsigned 8-bit integers in the low 8 byte sof a to packed 64-bit integers, and store the results in dst.
12104///
12105/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepu8_epi64&expand=1630)
12106#[inline]
12107#[target_feature(enable = "avx512f")]
12108#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12109#[cfg_attr(test, assert_instr(vpmovzxbq))]
12110#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12111pub const fn _mm512_cvtepu8_epi64(a: __m128i) -> __m512i {
12112 unsafe {
12113 let a: Simd = a.as_u8x16();
12114 let v64: u8x8 = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]);
12115 transmute::<i64x8, _>(src:simd_cast(v64))
12116 }
12117}
12118
12119/// Zero extend packed unsigned 8-bit integers in the low 8 bytes of a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12120///
12121/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepu8_epi64&expand=1631)
12122#[inline]
12123#[target_feature(enable = "avx512f")]
12124#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12125#[cfg_attr(test, assert_instr(vpmovzxbq))]
12126#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12127pub const fn _mm512_mask_cvtepu8_epi64(src: __m512i, k: __mmask8, a: __m128i) -> __m512i {
12128 unsafe {
12129 let convert: Simd = _mm512_cvtepu8_epi64(a).as_i64x8();
12130 transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i64x8()))
12131 }
12132}
12133
12134/// Zero extend packed unsigned 8-bit integers in the low 8 bytes of a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12135///
12136/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepu8_epi64&expand=1632)
12137#[inline]
12138#[target_feature(enable = "avx512f")]
12139#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12140#[cfg_attr(test, assert_instr(vpmovzxbq))]
12141#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12142pub const fn _mm512_maskz_cvtepu8_epi64(k: __mmask8, a: __m128i) -> __m512i {
12143 unsafe {
12144 let convert: Simd = _mm512_cvtepu8_epi64(a).as_i64x8();
12145 transmute(src:simd_select_bitmask(m:k, yes:convert, no:i64x8::ZERO))
12146 }
12147}
12148
12149/// Zero extend packed unsigned 8-bit integers in the low 4 bytes of a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12150///
12151/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepu8_epi64&expand=1628)
12152#[inline]
12153#[target_feature(enable = "avx512f,avx512vl")]
12154#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12155#[cfg_attr(test, assert_instr(vpmovzxbq))]
12156#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12157pub const fn _mm256_mask_cvtepu8_epi64(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
12158 unsafe {
12159 let convert: Simd = _mm256_cvtepu8_epi64(a).as_i64x4();
12160 transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i64x4()))
12161 }
12162}
12163
12164/// Zero extend packed unsigned 8-bit integers in the low 4 bytes of a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12165///
12166/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepu8_epi64&expand=1629)
12167#[inline]
12168#[target_feature(enable = "avx512f,avx512vl")]
12169#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12170#[cfg_attr(test, assert_instr(vpmovzxbq))]
12171#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12172pub const fn _mm256_maskz_cvtepu8_epi64(k: __mmask8, a: __m128i) -> __m256i {
12173 unsafe {
12174 let convert: Simd = _mm256_cvtepu8_epi64(a).as_i64x4();
12175 transmute(src:simd_select_bitmask(m:k, yes:convert, no:i64x4::ZERO))
12176 }
12177}
12178
12179/// Zero extend packed unsigned 8-bit integers in the low 2 bytes of a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12180///
12181/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepu8_epi64&expand=1625)
12182#[inline]
12183#[target_feature(enable = "avx512f,avx512vl")]
12184#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12185#[cfg_attr(test, assert_instr(vpmovzxbq))]
12186#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12187pub const fn _mm_mask_cvtepu8_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
12188 unsafe {
12189 let convert: Simd = _mm_cvtepu8_epi64(a).as_i64x2();
12190 transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i64x2()))
12191 }
12192}
12193
12194/// Zero extend packed unsigned 8-bit integers in the low 2 bytes of a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12195///
12196/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepu8_epi64&expand=1626)
12197#[inline]
12198#[target_feature(enable = "avx512f,avx512vl")]
12199#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12200#[cfg_attr(test, assert_instr(vpmovzxbq))]
12201#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12202pub const fn _mm_maskz_cvtepu8_epi64(k: __mmask8, a: __m128i) -> __m128i {
12203 unsafe {
12204 let convert: Simd = _mm_cvtepu8_epi64(a).as_i64x2();
12205 transmute(src:simd_select_bitmask(m:k, yes:convert, no:i64x2::ZERO))
12206 }
12207}
12208
12209/// Sign extend packed 16-bit integers in a to packed 32-bit integers, and store the results in dst.
12210///
12211/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi16_epi32&expand=1389)
12212#[inline]
12213#[target_feature(enable = "avx512f")]
12214#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12215#[cfg_attr(test, assert_instr(vpmovsxwd))]
12216#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12217pub const fn _mm512_cvtepi16_epi32(a: __m256i) -> __m512i {
12218 unsafe {
12219 let a: Simd = a.as_i16x16();
12220 transmute::<i32x16, _>(src:simd_cast(a))
12221 }
12222}
12223
12224/// Sign extend packed 16-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12225///
12226/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi16_epi32&expand=1390)
12227#[inline]
12228#[target_feature(enable = "avx512f")]
12229#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12230#[cfg_attr(test, assert_instr(vpmovsxwd))]
12231#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12232pub const fn _mm512_mask_cvtepi16_epi32(src: __m512i, k: __mmask16, a: __m256i) -> __m512i {
12233 unsafe {
12234 let convert: Simd = _mm512_cvtepi16_epi32(a).as_i32x16();
12235 transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i32x16()))
12236 }
12237}
12238
12239/// Sign extend packed 16-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12240///
12241/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi16_epi32&expand=1391)
12242#[inline]
12243#[target_feature(enable = "avx512f")]
12244#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12245#[cfg_attr(test, assert_instr(vpmovsxwd))]
12246#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12247pub const fn _mm512_maskz_cvtepi16_epi32(k: __mmask16, a: __m256i) -> __m512i {
12248 unsafe {
12249 let convert: Simd = _mm512_cvtepi16_epi32(a).as_i32x16();
12250 transmute(src:simd_select_bitmask(m:k, yes:convert, no:i32x16::ZERO))
12251 }
12252}
12253
12254/// Sign extend packed 16-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12255///
12256/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi16_epi32&expand=1387)
12257#[inline]
12258#[target_feature(enable = "avx512f,avx512vl")]
12259#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12260#[cfg_attr(test, assert_instr(vpmovsxwd))]
12261#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12262pub const fn _mm256_mask_cvtepi16_epi32(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
12263 unsafe {
12264 let convert: Simd = _mm256_cvtepi16_epi32(a).as_i32x8();
12265 transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i32x8()))
12266 }
12267}
12268
12269/// Sign extend packed 16-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12270///
12271/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi16_epi32&expand=1388)
12272#[inline]
12273#[target_feature(enable = "avx512f,avx512vl")]
12274#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12275#[cfg_attr(test, assert_instr(vpmovsxwd))]
12276#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12277pub const fn _mm256_maskz_cvtepi16_epi32(k: __mmask8, a: __m128i) -> __m256i {
12278 unsafe {
12279 let convert: Simd = _mm256_cvtepi16_epi32(a).as_i32x8();
12280 transmute(src:simd_select_bitmask(m:k, yes:convert, no:i32x8::ZERO))
12281 }
12282}
12283
12284/// Sign extend packed 16-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12285///
12286/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi16_epi32&expand=1384)
12287#[inline]
12288#[target_feature(enable = "avx512f,avx512vl")]
12289#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12290#[cfg_attr(test, assert_instr(vpmovsxwd))]
12291#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12292pub const fn _mm_mask_cvtepi16_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
12293 unsafe {
12294 let convert: Simd = _mm_cvtepi16_epi32(a).as_i32x4();
12295 transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i32x4()))
12296 }
12297}
12298
12299/// Sign extend packed 16-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12300///
12301/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi16_epi32&expand=1385)
12302#[inline]
12303#[target_feature(enable = "avx512f,avx512vl")]
12304#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12305#[cfg_attr(test, assert_instr(vpmovsxwd))]
12306#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12307pub const fn _mm_maskz_cvtepi16_epi32(k: __mmask8, a: __m128i) -> __m128i {
12308 unsafe {
12309 let convert: Simd = _mm_cvtepi16_epi32(a).as_i32x4();
12310 transmute(src:simd_select_bitmask(m:k, yes:convert, no:i32x4::ZERO))
12311 }
12312}
12313
12314/// Sign extend packed 16-bit integers in a to packed 64-bit integers, and store the results in dst.
12315///
12316/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi16_epi64&expand=1398)
12317#[inline]
12318#[target_feature(enable = "avx512f")]
12319#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12320#[cfg_attr(test, assert_instr(vpmovsxwq))]
12321#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12322pub const fn _mm512_cvtepi16_epi64(a: __m128i) -> __m512i {
12323 unsafe {
12324 let a: Simd = a.as_i16x8();
12325 transmute::<i64x8, _>(src:simd_cast(a))
12326 }
12327}
12328
12329/// Sign extend packed 16-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12330///
12331/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi16_epi64&expand=1399)
12332#[inline]
12333#[target_feature(enable = "avx512f")]
12334#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12335#[cfg_attr(test, assert_instr(vpmovsxwq))]
12336#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12337pub const fn _mm512_mask_cvtepi16_epi64(src: __m512i, k: __mmask8, a: __m128i) -> __m512i {
12338 unsafe {
12339 let convert: Simd = _mm512_cvtepi16_epi64(a).as_i64x8();
12340 transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i64x8()))
12341 }
12342}
12343
12344/// Sign extend packed 16-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12345///
12346/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi16_epi64&expand=1400)
12347#[inline]
12348#[target_feature(enable = "avx512f")]
12349#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12350#[cfg_attr(test, assert_instr(vpmovsxwq))]
12351#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12352pub const fn _mm512_maskz_cvtepi16_epi64(k: __mmask8, a: __m128i) -> __m512i {
12353 unsafe {
12354 let convert: Simd = _mm512_cvtepi16_epi64(a).as_i64x8();
12355 transmute(src:simd_select_bitmask(m:k, yes:convert, no:i64x8::ZERO))
12356 }
12357}
12358
12359/// Sign extend packed 16-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12360///
12361/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi16_epi64&expand=1396)
12362#[inline]
12363#[target_feature(enable = "avx512f,avx512vl")]
12364#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12365#[cfg_attr(test, assert_instr(vpmovsxwq))]
12366#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12367pub const fn _mm256_mask_cvtepi16_epi64(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
12368 unsafe {
12369 let convert: Simd = _mm256_cvtepi16_epi64(a).as_i64x4();
12370 transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i64x4()))
12371 }
12372}
12373
12374/// Sign extend packed 16-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12375///
12376/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi16_epi64&expand=1397)
12377#[inline]
12378#[target_feature(enable = "avx512f,avx512vl")]
12379#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12380#[cfg_attr(test, assert_instr(vpmovsxwq))]
12381#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12382pub const fn _mm256_maskz_cvtepi16_epi64(k: __mmask8, a: __m128i) -> __m256i {
12383 unsafe {
12384 let convert: Simd = _mm256_cvtepi16_epi64(a).as_i64x4();
12385 transmute(src:simd_select_bitmask(m:k, yes:convert, no:i64x4::ZERO))
12386 }
12387}
12388
12389/// Sign extend packed 16-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12390///
12391/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi16_epi64&expand=1393)
12392#[inline]
12393#[target_feature(enable = "avx512f,avx512vl")]
12394#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12395#[cfg_attr(test, assert_instr(vpmovsxwq))]
12396#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12397pub const fn _mm_mask_cvtepi16_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
12398 unsafe {
12399 let convert: Simd = _mm_cvtepi16_epi64(a).as_i64x2();
12400 transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i64x2()))
12401 }
12402}
12403
12404/// Sign extend packed 16-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12405///
12406/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi16_epi64&expand=1394)
12407#[inline]
12408#[target_feature(enable = "avx512f,avx512vl")]
12409#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12410#[cfg_attr(test, assert_instr(vpmovsxwq))]
12411#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12412pub const fn _mm_maskz_cvtepi16_epi64(k: __mmask8, a: __m128i) -> __m128i {
12413 unsafe {
12414 let convert: Simd = _mm_cvtepi16_epi64(a).as_i64x2();
12415 transmute(src:simd_select_bitmask(m:k, yes:convert, no:i64x2::ZERO))
12416 }
12417}
12418
12419/// Zero extend packed unsigned 16-bit integers in a to packed 32-bit integers, and store the results in dst.
12420///
12421/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepu16_epi32&expand=1553)
12422#[inline]
12423#[target_feature(enable = "avx512f")]
12424#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12425#[cfg_attr(test, assert_instr(vpmovzxwd))]
12426#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12427pub const fn _mm512_cvtepu16_epi32(a: __m256i) -> __m512i {
12428 unsafe {
12429 let a: Simd = a.as_u16x16();
12430 transmute::<i32x16, _>(src:simd_cast(a))
12431 }
12432}
12433
12434/// Zero extend packed unsigned 16-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12435///
12436/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepu16_epi32&expand=1554)
12437#[inline]
12438#[target_feature(enable = "avx512f")]
12439#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12440#[cfg_attr(test, assert_instr(vpmovzxwd))]
12441#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12442pub const fn _mm512_mask_cvtepu16_epi32(src: __m512i, k: __mmask16, a: __m256i) -> __m512i {
12443 unsafe {
12444 let convert: Simd = _mm512_cvtepu16_epi32(a).as_i32x16();
12445 transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i32x16()))
12446 }
12447}
12448
12449/// Zero extend packed unsigned 16-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12450///
12451/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepu16_epi32&expand=1555)
12452#[inline]
12453#[target_feature(enable = "avx512f")]
12454#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12455#[cfg_attr(test, assert_instr(vpmovzxwd))]
12456#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12457pub const fn _mm512_maskz_cvtepu16_epi32(k: __mmask16, a: __m256i) -> __m512i {
12458 unsafe {
12459 let convert: Simd = _mm512_cvtepu16_epi32(a).as_i32x16();
12460 transmute(src:simd_select_bitmask(m:k, yes:convert, no:i32x16::ZERO))
12461 }
12462}
12463
12464/// Zero extend packed unsigned 16-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12465///
12466/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepu16_epi32&expand=1551)
12467#[inline]
12468#[target_feature(enable = "avx512f,avx512vl")]
12469#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12470#[cfg_attr(test, assert_instr(vpmovzxwd))]
12471#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12472pub const fn _mm256_mask_cvtepu16_epi32(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
12473 unsafe {
12474 let convert: Simd = _mm256_cvtepu16_epi32(a).as_i32x8();
12475 transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i32x8()))
12476 }
12477}
12478
12479/// Zero extend packed unsigned 16-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12480///
12481/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepu16_epi32&expand=1552)
12482#[inline]
12483#[target_feature(enable = "avx512f,avx512vl")]
12484#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12485#[cfg_attr(test, assert_instr(vpmovzxwd))]
12486#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12487pub const fn _mm256_maskz_cvtepu16_epi32(k: __mmask8, a: __m128i) -> __m256i {
12488 unsafe {
12489 let convert: Simd = _mm256_cvtepu16_epi32(a).as_i32x8();
12490 transmute(src:simd_select_bitmask(m:k, yes:convert, no:i32x8::ZERO))
12491 }
12492}
12493
12494/// Zero extend packed unsigned 16-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12495///
12496/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepu16_epi32&expand=1548)
12497#[inline]
12498#[target_feature(enable = "avx512f,avx512vl")]
12499#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12500#[cfg_attr(test, assert_instr(vpmovzxwd))]
12501#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12502pub const fn _mm_mask_cvtepu16_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
12503 unsafe {
12504 let convert: Simd = _mm_cvtepu16_epi32(a).as_i32x4();
12505 transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i32x4()))
12506 }
12507}
12508
12509/// Zero extend packed unsigned 16-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12510///
12511/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepu16_epi32&expand=1549)
12512#[inline]
12513#[target_feature(enable = "avx512f,avx512vl")]
12514#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12515#[cfg_attr(test, assert_instr(vpmovzxwd))]
12516#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12517pub const fn _mm_maskz_cvtepu16_epi32(k: __mmask8, a: __m128i) -> __m128i {
12518 unsafe {
12519 let convert: Simd = _mm_cvtepu16_epi32(a).as_i32x4();
12520 transmute(src:simd_select_bitmask(m:k, yes:convert, no:i32x4::ZERO))
12521 }
12522}
12523
12524/// Zero extend packed unsigned 16-bit integers in a to packed 64-bit integers, and store the results in dst.
12525///
12526/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepu16_epi64&expand=1562)
12527#[inline]
12528#[target_feature(enable = "avx512f")]
12529#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12530#[cfg_attr(test, assert_instr(vpmovzxwq))]
12531#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12532pub const fn _mm512_cvtepu16_epi64(a: __m128i) -> __m512i {
12533 unsafe {
12534 let a: Simd = a.as_u16x8();
12535 transmute::<i64x8, _>(src:simd_cast(a))
12536 }
12537}
12538
12539/// Zero extend packed unsigned 16-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12540///
12541/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepu16_epi64&expand=1563)
12542#[inline]
12543#[target_feature(enable = "avx512f")]
12544#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12545#[cfg_attr(test, assert_instr(vpmovzxwq))]
12546#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12547pub const fn _mm512_mask_cvtepu16_epi64(src: __m512i, k: __mmask8, a: __m128i) -> __m512i {
12548 unsafe {
12549 let convert: Simd = _mm512_cvtepu16_epi64(a).as_i64x8();
12550 transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i64x8()))
12551 }
12552}
12553
12554/// Zero extend packed unsigned 16-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12555///
12556/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepu16_epi64&expand=1564)
12557#[inline]
12558#[target_feature(enable = "avx512f")]
12559#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12560#[cfg_attr(test, assert_instr(vpmovzxwq))]
12561#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12562pub const fn _mm512_maskz_cvtepu16_epi64(k: __mmask8, a: __m128i) -> __m512i {
12563 unsafe {
12564 let convert: Simd = _mm512_cvtepu16_epi64(a).as_i64x8();
12565 transmute(src:simd_select_bitmask(m:k, yes:convert, no:i64x8::ZERO))
12566 }
12567}
12568
12569/// Zero extend packed unsigned 16-bit integers in the low 8 bytes of a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12570///
12571/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepu16_epi64&expand=1560)
12572#[inline]
12573#[target_feature(enable = "avx512f,avx512vl")]
12574#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12575#[cfg_attr(test, assert_instr(vpmovzxwq))]
12576#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12577pub const fn _mm256_mask_cvtepu16_epi64(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
12578 unsafe {
12579 let convert: Simd = _mm256_cvtepu16_epi64(a).as_i64x4();
12580 transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i64x4()))
12581 }
12582}
12583
12584/// Zero extend packed unsigned 16-bit integers in the low 8 bytes of a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12585///
12586/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepu16_epi64&expand=1561)
12587#[inline]
12588#[target_feature(enable = "avx512f,avx512vl")]
12589#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12590#[cfg_attr(test, assert_instr(vpmovzxwq))]
12591#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12592pub const fn _mm256_maskz_cvtepu16_epi64(k: __mmask8, a: __m128i) -> __m256i {
12593 unsafe {
12594 let convert: Simd = _mm256_cvtepu16_epi64(a).as_i64x4();
12595 transmute(src:simd_select_bitmask(m:k, yes:convert, no:i64x4::ZERO))
12596 }
12597}
12598
12599/// Zero extend packed unsigned 16-bit integers in the low 4 bytes of a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12600///
12601/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepu16_epi64&expand=1557)
12602#[inline]
12603#[target_feature(enable = "avx512f,avx512vl")]
12604#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12605#[cfg_attr(test, assert_instr(vpmovzxwq))]
12606#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12607pub const fn _mm_mask_cvtepu16_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
12608 unsafe {
12609 let convert: Simd = _mm_cvtepu16_epi64(a).as_i64x2();
12610 transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i64x2()))
12611 }
12612}
12613
12614/// Zero extend packed unsigned 16-bit integers in the low 4 bytes of a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12615///
12616/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepu16_epi64&expand=1558)
12617#[inline]
12618#[target_feature(enable = "avx512f,avx512vl")]
12619#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12620#[cfg_attr(test, assert_instr(vpmovzxwq))]
12621#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12622pub const fn _mm_maskz_cvtepu16_epi64(k: __mmask8, a: __m128i) -> __m128i {
12623 unsafe {
12624 let convert: Simd = _mm_cvtepu16_epi64(a).as_i64x2();
12625 transmute(src:simd_select_bitmask(m:k, yes:convert, no:i64x2::ZERO))
12626 }
12627}
12628
12629/// Sign extend packed 32-bit integers in a to packed 64-bit integers, and store the results in dst.
12630///
12631/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi32_epi64&expand=1428)
12632#[inline]
12633#[target_feature(enable = "avx512f")]
12634#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12635#[cfg_attr(test, assert_instr(vpmovsxdq))]
12636#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12637pub const fn _mm512_cvtepi32_epi64(a: __m256i) -> __m512i {
12638 unsafe {
12639 let a: Simd = a.as_i32x8();
12640 transmute::<i64x8, _>(src:simd_cast(a))
12641 }
12642}
12643
12644/// Sign extend packed 32-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12645///
12646/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi32_epi64&expand=1429)
12647#[inline]
12648#[target_feature(enable = "avx512f")]
12649#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12650#[cfg_attr(test, assert_instr(vpmovsxdq))]
12651#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12652pub const fn _mm512_mask_cvtepi32_epi64(src: __m512i, k: __mmask8, a: __m256i) -> __m512i {
12653 unsafe {
12654 let convert: Simd = _mm512_cvtepi32_epi64(a).as_i64x8();
12655 transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i64x8()))
12656 }
12657}
12658
12659/// Sign extend packed 32-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12660///
12661/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi32_epi64&expand=1430)
12662#[inline]
12663#[target_feature(enable = "avx512f")]
12664#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12665#[cfg_attr(test, assert_instr(vpmovsxdq))]
12666#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12667pub const fn _mm512_maskz_cvtepi32_epi64(k: __mmask8, a: __m256i) -> __m512i {
12668 unsafe {
12669 let convert: Simd = _mm512_cvtepi32_epi64(a).as_i64x8();
12670 transmute(src:simd_select_bitmask(m:k, yes:convert, no:i64x8::ZERO))
12671 }
12672}
12673
12674/// Sign extend packed 32-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12675///
12676/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi32_epi64&expand=1426)
12677#[inline]
12678#[target_feature(enable = "avx512f,avx512vl")]
12679#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12680#[cfg_attr(test, assert_instr(vpmovsxdq))]
12681#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12682pub const fn _mm256_mask_cvtepi32_epi64(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
12683 unsafe {
12684 let convert: Simd = _mm256_cvtepi32_epi64(a).as_i64x4();
12685 transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i64x4()))
12686 }
12687}
12688
12689/// Sign extend packed 32-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12690///
12691/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi32_epi64&expand=1427)
12692#[inline]
12693#[target_feature(enable = "avx512f,avx512vl")]
12694#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12695#[cfg_attr(test, assert_instr(vpmovsxdq))]
12696#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12697pub const fn _mm256_maskz_cvtepi32_epi64(k: __mmask8, a: __m128i) -> __m256i {
12698 unsafe {
12699 let convert: Simd = _mm256_cvtepi32_epi64(a).as_i64x4();
12700 transmute(src:simd_select_bitmask(m:k, yes:convert, no:i64x4::ZERO))
12701 }
12702}
12703
12704/// Sign extend packed 32-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12705///
12706/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi32_epi64&expand=1423)
12707#[inline]
12708#[target_feature(enable = "avx512f,avx512vl")]
12709#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12710#[cfg_attr(test, assert_instr(vpmovsxdq))]
12711#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12712pub const fn _mm_mask_cvtepi32_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
12713 unsafe {
12714 let convert: Simd = _mm_cvtepi32_epi64(a).as_i64x2();
12715 transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i64x2()))
12716 }
12717}
12718
12719/// Sign extend packed 32-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12720///
12721/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi32_epi64&expand=1424)
12722#[inline]
12723#[target_feature(enable = "avx512f,avx512vl")]
12724#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12725#[cfg_attr(test, assert_instr(vpmovsxdq))]
12726#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12727pub const fn _mm_maskz_cvtepi32_epi64(k: __mmask8, a: __m128i) -> __m128i {
12728 unsafe {
12729 let convert: Simd = _mm_cvtepi32_epi64(a).as_i64x2();
12730 transmute(src:simd_select_bitmask(m:k, yes:convert, no:i64x2::ZERO))
12731 }
12732}
12733
12734/// Zero extend packed unsigned 32-bit integers in a to packed 64-bit integers, and store the results in dst.
12735///
12736/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepu32_epi64&expand=1571)
12737#[inline]
12738#[target_feature(enable = "avx512f")]
12739#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12740#[cfg_attr(test, assert_instr(vpmovzxdq))]
12741#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12742pub const fn _mm512_cvtepu32_epi64(a: __m256i) -> __m512i {
12743 unsafe {
12744 let a: Simd = a.as_u32x8();
12745 transmute::<i64x8, _>(src:simd_cast(a))
12746 }
12747}
12748
12749/// Zero extend packed unsigned 32-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12750///
12751/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepu32_epi64&expand=1572)
12752#[inline]
12753#[target_feature(enable = "avx512f")]
12754#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12755#[cfg_attr(test, assert_instr(vpmovzxdq))]
12756#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12757pub const fn _mm512_mask_cvtepu32_epi64(src: __m512i, k: __mmask8, a: __m256i) -> __m512i {
12758 unsafe {
12759 let convert: Simd = _mm512_cvtepu32_epi64(a).as_i64x8();
12760 transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i64x8()))
12761 }
12762}
12763
12764/// Zero extend packed unsigned 32-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12765///
12766/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepu32_epi64&expand=1573)
12767#[inline]
12768#[target_feature(enable = "avx512f")]
12769#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12770#[cfg_attr(test, assert_instr(vpmovzxdq))]
12771#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12772pub const fn _mm512_maskz_cvtepu32_epi64(k: __mmask8, a: __m256i) -> __m512i {
12773 unsafe {
12774 let convert: Simd = _mm512_cvtepu32_epi64(a).as_i64x8();
12775 transmute(src:simd_select_bitmask(m:k, yes:convert, no:i64x8::ZERO))
12776 }
12777}
12778
12779/// Zero extend packed unsigned 32-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12780///
12781/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepu32_epi64&expand=1569)
12782#[inline]
12783#[target_feature(enable = "avx512f,avx512vl")]
12784#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12785#[cfg_attr(test, assert_instr(vpmovzxdq))]
12786#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12787pub const fn _mm256_mask_cvtepu32_epi64(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
12788 unsafe {
12789 let convert: Simd = _mm256_cvtepu32_epi64(a).as_i64x4();
12790 transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i64x4()))
12791 }
12792}
12793
12794/// Zero extend packed unsigned 32-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12795///
12796/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepu32_epi64&expand=1570)
12797#[inline]
12798#[target_feature(enable = "avx512f,avx512vl")]
12799#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12800#[cfg_attr(test, assert_instr(vpmovzxdq))]
12801#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12802pub const fn _mm256_maskz_cvtepu32_epi64(k: __mmask8, a: __m128i) -> __m256i {
12803 unsafe {
12804 let convert: Simd = _mm256_cvtepu32_epi64(a).as_i64x4();
12805 transmute(src:simd_select_bitmask(m:k, yes:convert, no:i64x4::ZERO))
12806 }
12807}
12808
12809/// Zero extend packed unsigned 32-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12810///
12811/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepu32_epi64&expand=1566)
12812#[inline]
12813#[target_feature(enable = "avx512f,avx512vl")]
12814#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12815#[cfg_attr(test, assert_instr(vpmovzxdq))]
12816#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12817pub const fn _mm_mask_cvtepu32_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
12818 unsafe {
12819 let convert: Simd = _mm_cvtepu32_epi64(a).as_i64x2();
12820 transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i64x2()))
12821 }
12822}
12823
12824/// Zero extend packed unsigned 32-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12825///
12826/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepu32_epi64&expand=1567)
12827#[inline]
12828#[target_feature(enable = "avx512f,avx512vl")]
12829#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12830#[cfg_attr(test, assert_instr(vpmovzxdq))]
12831#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12832pub const fn _mm_maskz_cvtepu32_epi64(k: __mmask8, a: __m128i) -> __m128i {
12833 unsafe {
12834 let convert: Simd = _mm_cvtepu32_epi64(a).as_i64x2();
12835 transmute(src:simd_select_bitmask(m:k, yes:convert, no:i64x2::ZERO))
12836 }
12837}
12838
12839/// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst.
12840///
12841/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi32_ps&expand=1455)
12842#[inline]
12843#[target_feature(enable = "avx512f")]
12844#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12845#[cfg_attr(test, assert_instr(vcvtdq2ps))]
12846#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12847pub const fn _mm512_cvtepi32_ps(a: __m512i) -> __m512 {
12848 unsafe {
12849 let a: Simd = a.as_i32x16();
12850 transmute::<f32x16, _>(src:simd_cast(a))
12851 }
12852}
12853
12854/// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12855///
12856/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi32_ps&expand=1456)
12857#[inline]
12858#[target_feature(enable = "avx512f")]
12859#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12860#[cfg_attr(test, assert_instr(vcvtdq2ps))]
12861#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12862pub const fn _mm512_mask_cvtepi32_ps(src: __m512, k: __mmask16, a: __m512i) -> __m512 {
12863 unsafe {
12864 let convert: Simd = _mm512_cvtepi32_ps(a).as_f32x16();
12865 transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_f32x16()))
12866 }
12867}
12868
12869/// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12870///
12871/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi32_ps&expand=1457)
12872#[inline]
12873#[target_feature(enable = "avx512f")]
12874#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12875#[cfg_attr(test, assert_instr(vcvtdq2ps))]
12876#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12877pub const fn _mm512_maskz_cvtepi32_ps(k: __mmask16, a: __m512i) -> __m512 {
12878 unsafe {
12879 let convert: Simd = _mm512_cvtepi32_ps(a).as_f32x16();
12880 transmute(src:simd_select_bitmask(m:k, yes:convert, no:f32x16::ZERO))
12881 }
12882}
12883
12884/// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12885///
12886/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi32_ps&expand=1453)
12887#[inline]
12888#[target_feature(enable = "avx512f,avx512vl")]
12889#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12890#[cfg_attr(test, assert_instr(vcvtdq2ps))]
12891#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12892pub const fn _mm256_mask_cvtepi32_ps(src: __m256, k: __mmask8, a: __m256i) -> __m256 {
12893 unsafe {
12894 let convert: Simd = _mm256_cvtepi32_ps(a).as_f32x8();
12895 transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_f32x8()))
12896 }
12897}
12898
12899/// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12900///
12901/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi32_ps&expand=1454)
12902#[inline]
12903#[target_feature(enable = "avx512f,avx512vl")]
12904#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12905#[cfg_attr(test, assert_instr(vcvtdq2ps))]
12906#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12907pub const fn _mm256_maskz_cvtepi32_ps(k: __mmask8, a: __m256i) -> __m256 {
12908 unsafe {
12909 let convert: Simd = _mm256_cvtepi32_ps(a).as_f32x8();
12910 transmute(src:simd_select_bitmask(m:k, yes:convert, no:f32x8::ZERO))
12911 }
12912}
12913
12914/// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12915///
12916/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi32_ps&expand=1450)
12917#[inline]
12918#[target_feature(enable = "avx512f,avx512vl")]
12919#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12920#[cfg_attr(test, assert_instr(vcvtdq2ps))]
12921#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12922pub const fn _mm_mask_cvtepi32_ps(src: __m128, k: __mmask8, a: __m128i) -> __m128 {
12923 unsafe {
12924 let convert: Simd = _mm_cvtepi32_ps(a).as_f32x4();
12925 transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_f32x4()))
12926 }
12927}
12928
12929/// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12930///
12931/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi32_ps&expand=1451)
12932#[inline]
12933#[target_feature(enable = "avx512f,avx512vl")]
12934#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12935#[cfg_attr(test, assert_instr(vcvtdq2ps))]
12936#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12937pub const fn _mm_maskz_cvtepi32_ps(k: __mmask8, a: __m128i) -> __m128 {
12938 unsafe {
12939 let convert: Simd = _mm_cvtepi32_ps(a).as_f32x4();
12940 transmute(src:simd_select_bitmask(m:k, yes:convert, no:f32x4::ZERO))
12941 }
12942}
12943
12944/// Convert packed signed 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst.
12945///
12946/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi32_pd&expand=1446)
12947#[inline]
12948#[target_feature(enable = "avx512f")]
12949#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12950#[cfg_attr(test, assert_instr(vcvtdq2pd))]
12951#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12952pub const fn _mm512_cvtepi32_pd(a: __m256i) -> __m512d {
12953 unsafe {
12954 let a: Simd = a.as_i32x8();
12955 transmute::<f64x8, _>(src:simd_cast(a))
12956 }
12957}
12958
12959/// Convert packed signed 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12960///
12961/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi32_pd&expand=1447)
12962#[inline]
12963#[target_feature(enable = "avx512f")]
12964#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12965#[cfg_attr(test, assert_instr(vcvtdq2pd))]
12966#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12967pub const fn _mm512_mask_cvtepi32_pd(src: __m512d, k: __mmask8, a: __m256i) -> __m512d {
12968 unsafe {
12969 let convert: Simd = _mm512_cvtepi32_pd(a).as_f64x8();
12970 transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_f64x8()))
12971 }
12972}
12973
12974/// Convert packed signed 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12975///
12976/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi32_pd&expand=1448)
12977#[inline]
12978#[target_feature(enable = "avx512f")]
12979#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12980#[cfg_attr(test, assert_instr(vcvtdq2pd))]
12981#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12982pub const fn _mm512_maskz_cvtepi32_pd(k: __mmask8, a: __m256i) -> __m512d {
12983 unsafe {
12984 let convert: Simd = _mm512_cvtepi32_pd(a).as_f64x8();
12985 transmute(src:simd_select_bitmask(m:k, yes:convert, no:f64x8::ZERO))
12986 }
12987}
12988
12989/// Convert packed signed 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12990///
12991/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi32_pd&expand=1444)
12992#[inline]
12993#[target_feature(enable = "avx512f,avx512vl")]
12994#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12995#[cfg_attr(test, assert_instr(vcvtdq2pd))]
12996#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12997pub const fn _mm256_mask_cvtepi32_pd(src: __m256d, k: __mmask8, a: __m128i) -> __m256d {
12998 unsafe {
12999 let convert: Simd = _mm256_cvtepi32_pd(a).as_f64x4();
13000 transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_f64x4()))
13001 }
13002}
13003
13004/// Convert packed signed 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13005///
13006/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi32_pd&expand=1445)
13007#[inline]
13008#[target_feature(enable = "avx512f,avx512vl")]
13009#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13010#[cfg_attr(test, assert_instr(vcvtdq2pd))]
13011#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
13012pub const fn _mm256_maskz_cvtepi32_pd(k: __mmask8, a: __m128i) -> __m256d {
13013 unsafe {
13014 let convert: Simd = _mm256_cvtepi32_pd(a).as_f64x4();
13015 transmute(src:simd_select_bitmask(m:k, yes:convert, no:f64x4::ZERO))
13016 }
13017}
13018
13019/// Convert packed signed 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13020///
13021/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi32_pd&expand=1441)
13022#[inline]
13023#[target_feature(enable = "avx512f,avx512vl")]
13024#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13025#[cfg_attr(test, assert_instr(vcvtdq2pd))]
13026#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
13027pub const fn _mm_mask_cvtepi32_pd(src: __m128d, k: __mmask8, a: __m128i) -> __m128d {
13028 unsafe {
13029 let convert: Simd = _mm_cvtepi32_pd(a).as_f64x2();
13030 transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_f64x2()))
13031 }
13032}
13033
13034/// Convert packed signed 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13035///
13036/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi32_pd&expand=1442)
13037#[inline]
13038#[target_feature(enable = "avx512f,avx512vl")]
13039#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13040#[cfg_attr(test, assert_instr(vcvtdq2pd))]
13041#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
13042pub const fn _mm_maskz_cvtepi32_pd(k: __mmask8, a: __m128i) -> __m128d {
13043 unsafe {
13044 let convert: Simd = _mm_cvtepi32_pd(a).as_f64x2();
13045 transmute(src:simd_select_bitmask(m:k, yes:convert, no:f64x2::ZERO))
13046 }
13047}
13048
13049/// Convert packed unsigned 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst.
13050///
13051/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepu32_ps&expand=1583)
13052#[inline]
13053#[target_feature(enable = "avx512f")]
13054#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13055#[cfg_attr(test, assert_instr(vcvtudq2ps))]
13056#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
13057pub const fn _mm512_cvtepu32_ps(a: __m512i) -> __m512 {
13058 unsafe {
13059 let a: Simd = a.as_u32x16();
13060 transmute::<f32x16, _>(src:simd_cast(a))
13061 }
13062}
13063
13064/// Convert packed unsigned 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13065///
13066/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepu32_ps&expand=1584)
13067#[inline]
13068#[target_feature(enable = "avx512f")]
13069#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13070#[cfg_attr(test, assert_instr(vcvtudq2ps))]
13071#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
13072pub const fn _mm512_mask_cvtepu32_ps(src: __m512, k: __mmask16, a: __m512i) -> __m512 {
13073 unsafe {
13074 let convert: Simd = _mm512_cvtepu32_ps(a).as_f32x16();
13075 transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_f32x16()))
13076 }
13077}
13078
13079/// Convert packed unsigned 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13080///
13081/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepu32_ps&expand=1585)
13082#[inline]
13083#[target_feature(enable = "avx512f")]
13084#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13085#[cfg_attr(test, assert_instr(vcvtudq2ps))]
13086#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
13087pub const fn _mm512_maskz_cvtepu32_ps(k: __mmask16, a: __m512i) -> __m512 {
13088 unsafe {
13089 let convert: Simd = _mm512_cvtepu32_ps(a).as_f32x16();
13090 transmute(src:simd_select_bitmask(m:k, yes:convert, no:f32x16::ZERO))
13091 }
13092}
13093
13094/// Convert packed unsigned 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst.
13095///
13096/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepu32_pd&expand=1580)
13097#[inline]
13098#[target_feature(enable = "avx512f")]
13099#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13100#[cfg_attr(test, assert_instr(vcvtudq2pd))]
13101#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
13102pub const fn _mm512_cvtepu32_pd(a: __m256i) -> __m512d {
13103 unsafe {
13104 let a: Simd = a.as_u32x8();
13105 transmute::<f64x8, _>(src:simd_cast(a))
13106 }
13107}
13108
13109/// Convert packed unsigned 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13110///
13111/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepu32_pd&expand=1581)
13112#[inline]
13113#[target_feature(enable = "avx512f")]
13114#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13115#[cfg_attr(test, assert_instr(vcvtudq2pd))]
13116#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
13117pub const fn _mm512_mask_cvtepu32_pd(src: __m512d, k: __mmask8, a: __m256i) -> __m512d {
13118 unsafe {
13119 let convert: Simd = _mm512_cvtepu32_pd(a).as_f64x8();
13120 transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_f64x8()))
13121 }
13122}
13123
13124/// Convert packed unsigned 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13125///
13126/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepu32_pd&expand=1582)
13127#[inline]
13128#[target_feature(enable = "avx512f")]
13129#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13130#[cfg_attr(test, assert_instr(vcvtudq2pd))]
13131#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
13132pub const fn _mm512_maskz_cvtepu32_pd(k: __mmask8, a: __m256i) -> __m512d {
13133 unsafe {
13134 let convert: Simd = _mm512_cvtepu32_pd(a).as_f64x8();
13135 transmute(src:simd_select_bitmask(m:k, yes:convert, no:f64x8::ZERO))
13136 }
13137}
13138
13139/// Convert packed unsigned 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst.
13140///
13141/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepu32_pd&expand=1577)
13142#[inline]
13143#[target_feature(enable = "avx512f,avx512vl")]
13144#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13145#[cfg_attr(test, assert_instr(vcvtudq2pd))]
13146#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
13147pub const fn _mm256_cvtepu32_pd(a: __m128i) -> __m256d {
13148 unsafe {
13149 let a: Simd = a.as_u32x4();
13150 transmute::<f64x4, _>(src:simd_cast(a))
13151 }
13152}
13153
13154/// Convert packed unsigned 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13155///
13156/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepu32_pd&expand=1578)
13157#[inline]
13158#[target_feature(enable = "avx512f,avx512vl")]
13159#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13160#[cfg_attr(test, assert_instr(vcvtudq2pd))]
13161#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
13162pub const fn _mm256_mask_cvtepu32_pd(src: __m256d, k: __mmask8, a: __m128i) -> __m256d {
13163 unsafe {
13164 let convert: Simd = _mm256_cvtepu32_pd(a).as_f64x4();
13165 transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_f64x4()))
13166 }
13167}
13168
13169/// Convert packed unsigned 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13170///
13171/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepu32_pd&expand=1579)
13172#[inline]
13173#[target_feature(enable = "avx512f,avx512vl")]
13174#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13175#[cfg_attr(test, assert_instr(vcvtudq2pd))]
13176#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
13177pub const fn _mm256_maskz_cvtepu32_pd(k: __mmask8, a: __m128i) -> __m256d {
13178 unsafe {
13179 let convert: Simd = _mm256_cvtepu32_pd(a).as_f64x4();
13180 transmute(src:simd_select_bitmask(m:k, yes:convert, no:f64x4::ZERO))
13181 }
13182}
13183
13184/// Convert packed unsigned 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst.
13185///
13186/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepu32_pd&expand=1574)
13187#[inline]
13188#[target_feature(enable = "avx512f,avx512vl")]
13189#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13190#[cfg_attr(test, assert_instr(vcvtudq2pd))]
13191#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
13192pub const fn _mm_cvtepu32_pd(a: __m128i) -> __m128d {
13193 unsafe {
13194 let a: Simd = a.as_u32x4();
13195 let u64: u32x2 = simd_shuffle!(a, a, [0, 1]);
13196 transmute::<f64x2, _>(src:simd_cast(u64))
13197 }
13198}
13199
13200/// Convert packed unsigned 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13201///
13202/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepu32_pd&expand=1575)
13203#[inline]
13204#[target_feature(enable = "avx512f,avx512vl")]
13205#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13206#[cfg_attr(test, assert_instr(vcvtudq2pd))]
13207#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
13208pub const fn _mm_mask_cvtepu32_pd(src: __m128d, k: __mmask8, a: __m128i) -> __m128d {
13209 unsafe {
13210 let convert: Simd = _mm_cvtepu32_pd(a).as_f64x2();
13211 transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_f64x2()))
13212 }
13213}
13214
13215/// Convert packed unsigned 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13216///
13217/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepu32_pd&expand=1576)
13218#[inline]
13219#[target_feature(enable = "avx512f,avx512vl")]
13220#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13221#[cfg_attr(test, assert_instr(vcvtudq2pd))]
13222#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
13223pub const fn _mm_maskz_cvtepu32_pd(k: __mmask8, a: __m128i) -> __m128d {
13224 unsafe {
13225 let convert: Simd = _mm_cvtepu32_pd(a).as_f64x2();
13226 transmute(src:simd_select_bitmask(m:k, yes:convert, no:f64x2::ZERO))
13227 }
13228}
13229
13230/// Performs element-by-element conversion of the lower half of packed 32-bit integer elements in v2 to packed double-precision (64-bit) floating-point elements, storing the results in dst.
13231///
13232/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi32lo_pd&expand=1464)
13233#[inline]
13234#[target_feature(enable = "avx512f")]
13235#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13236#[cfg_attr(test, assert_instr(vcvtdq2pd))]
13237#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
13238pub const fn _mm512_cvtepi32lo_pd(v2: __m512i) -> __m512d {
13239 unsafe {
13240 let v2: Simd = v2.as_i32x16();
13241 let v256: i32x8 = simd_shuffle!(v2, v2, [0, 1, 2, 3, 4, 5, 6, 7]);
13242 transmute::<f64x8, _>(src:simd_cast(v256))
13243 }
13244}
13245
13246/// Performs element-by-element conversion of the lower half of packed 32-bit integer elements in v2 to packed double-precision (64-bit) floating-point elements, storing the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13247///
13248/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi32lo_pd&expand=1465)
13249#[inline]
13250#[target_feature(enable = "avx512f")]
13251#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13252#[cfg_attr(test, assert_instr(vcvtdq2pd))]
13253#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
13254pub const fn _mm512_mask_cvtepi32lo_pd(src: __m512d, k: __mmask8, v2: __m512i) -> __m512d {
13255 unsafe {
13256 let convert: Simd = _mm512_cvtepi32lo_pd(v2).as_f64x8();
13257 transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_f64x8()))
13258 }
13259}
13260
13261/// Performs element-by-element conversion of the lower half of packed 32-bit unsigned integer elements in v2 to packed double-precision (64-bit) floating-point elements, storing the results in dst.
13262///
13263/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepu32lo_pd&expand=1586)
13264#[inline]
13265#[target_feature(enable = "avx512f")]
13266#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13267#[cfg_attr(test, assert_instr(vcvtudq2pd))]
13268#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
13269pub const fn _mm512_cvtepu32lo_pd(v2: __m512i) -> __m512d {
13270 unsafe {
13271 let v2: Simd = v2.as_u32x16();
13272 let v256: u32x8 = simd_shuffle!(v2, v2, [0, 1, 2, 3, 4, 5, 6, 7]);
13273 transmute::<f64x8, _>(src:simd_cast(v256))
13274 }
13275}
13276
13277/// Performs element-by-element conversion of the lower half of 32-bit unsigned integer elements in v2 to packed double-precision (64-bit) floating-point elements, storing the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13278///
13279/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepu32lo_pd&expand=1587)
13280#[inline]
13281#[target_feature(enable = "avx512f")]
13282#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13283#[cfg_attr(test, assert_instr(vcvtudq2pd))]
13284#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
13285pub const fn _mm512_mask_cvtepu32lo_pd(src: __m512d, k: __mmask8, v2: __m512i) -> __m512d {
13286 unsafe {
13287 let convert: Simd = _mm512_cvtepu32lo_pd(v2).as_f64x8();
13288 transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_f64x8()))
13289 }
13290}
13291
13292/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the results in dst.
13293///
13294/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi32_epi16&expand=1419)
13295#[inline]
13296#[target_feature(enable = "avx512f")]
13297#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13298#[cfg_attr(test, assert_instr(vpmovdw))]
13299#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
13300pub const fn _mm512_cvtepi32_epi16(a: __m512i) -> __m256i {
13301 unsafe {
13302 let a: Simd = a.as_i32x16();
13303 transmute::<i16x16, _>(src:simd_cast(a))
13304 }
13305}
13306
13307/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13308///
13309/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi32_epi16&expand=1420)
13310#[inline]
13311#[target_feature(enable = "avx512f")]
13312#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13313#[cfg_attr(test, assert_instr(vpmovdw))]
13314#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
13315pub const fn _mm512_mask_cvtepi32_epi16(src: __m256i, k: __mmask16, a: __m512i) -> __m256i {
13316 unsafe {
13317 let convert: Simd = _mm512_cvtepi32_epi16(a).as_i16x16();
13318 transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i16x16()))
13319 }
13320}
13321
13322/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13323///
13324/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi32_epi16&expand=1421)
13325#[inline]
13326#[target_feature(enable = "avx512f")]
13327#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13328#[cfg_attr(test, assert_instr(vpmovdw))]
13329#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
13330pub const fn _mm512_maskz_cvtepi32_epi16(k: __mmask16, a: __m512i) -> __m256i {
13331 unsafe {
13332 let convert: Simd = _mm512_cvtepi32_epi16(a).as_i16x16();
13333 transmute(src:simd_select_bitmask(m:k, yes:convert, no:i16x16::ZERO))
13334 }
13335}
13336
13337/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the results in dst.
13338///
13339/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepi32_epi16&expand=1416)
13340#[inline]
13341#[target_feature(enable = "avx512f,avx512vl")]
13342#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13343#[cfg_attr(test, assert_instr(vpmovdw))]
13344#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
13345pub const fn _mm256_cvtepi32_epi16(a: __m256i) -> __m128i {
13346 unsafe {
13347 let a: Simd = a.as_i32x8();
13348 transmute::<i16x8, _>(src:simd_cast(a))
13349 }
13350}
13351
13352/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13353///
13354/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi32_epi16&expand=1417)
13355#[inline]
13356#[target_feature(enable = "avx512f,avx512vl")]
13357#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13358#[cfg_attr(test, assert_instr(vpmovdw))]
13359#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
13360pub const fn _mm256_mask_cvtepi32_epi16(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
13361 unsafe {
13362 let convert: Simd = _mm256_cvtepi32_epi16(a).as_i16x8();
13363 transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i16x8()))
13364 }
13365}
13366
13367/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13368///
13369/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi32_epi16&expand=1418)
13370#[inline]
13371#[target_feature(enable = "avx512f,avx512vl")]
13372#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13373#[cfg_attr(test, assert_instr(vpmovdw))]
13374#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
13375pub const fn _mm256_maskz_cvtepi32_epi16(k: __mmask8, a: __m256i) -> __m128i {
13376 unsafe {
13377 let convert: Simd = _mm256_cvtepi32_epi16(a).as_i16x8();
13378 transmute(src:simd_select_bitmask(m:k, yes:convert, no:i16x8::ZERO))
13379 }
13380}
13381
13382/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the results in dst.
13383///
13384/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi32_epi16&expand=1413)
13385#[inline]
13386#[target_feature(enable = "avx512f,avx512vl")]
13387#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13388#[cfg_attr(test, assert_instr(vpmovdw))]
13389pub fn _mm_cvtepi32_epi16(a: __m128i) -> __m128i {
13390 unsafe { transmute(src:vpmovdw128(a.as_i32x4(), src:i16x8::ZERO, mask:0b11111111)) }
13391}
13392
13393/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13394///
13395/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi32_epi16&expand=1414)
13396#[inline]
13397#[target_feature(enable = "avx512f,avx512vl")]
13398#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13399#[cfg_attr(test, assert_instr(vpmovdw))]
13400pub fn _mm_mask_cvtepi32_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
13401 unsafe { transmute(src:vpmovdw128(a.as_i32x4(), src.as_i16x8(), mask:k)) }
13402}
13403
13404/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13405///
13406/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi32_epi16&expand=1415)
13407#[inline]
13408#[target_feature(enable = "avx512f,avx512vl")]
13409#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13410#[cfg_attr(test, assert_instr(vpmovdw))]
13411pub fn _mm_maskz_cvtepi32_epi16(k: __mmask8, a: __m128i) -> __m128i {
13412 unsafe { transmute(src:vpmovdw128(a.as_i32x4(), src:i16x8::ZERO, mask:k)) }
13413}
13414
13415/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the results in dst.
13416///
13417/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi32_epi8&expand=1437)
13418#[inline]
13419#[target_feature(enable = "avx512f")]
13420#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13421#[cfg_attr(test, assert_instr(vpmovdb))]
13422#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
13423pub const fn _mm512_cvtepi32_epi8(a: __m512i) -> __m128i {
13424 unsafe {
13425 let a: Simd = a.as_i32x16();
13426 transmute::<i8x16, _>(src:simd_cast(a))
13427 }
13428}
13429
13430/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13431///
13432/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi32_epi8&expand=1438)
13433#[inline]
13434#[target_feature(enable = "avx512f")]
13435#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13436#[cfg_attr(test, assert_instr(vpmovdb))]
13437#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
13438pub const fn _mm512_mask_cvtepi32_epi8(src: __m128i, k: __mmask16, a: __m512i) -> __m128i {
13439 unsafe {
13440 let convert: Simd = _mm512_cvtepi32_epi8(a).as_i8x16();
13441 transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i8x16()))
13442 }
13443}
13444
13445/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13446///
13447/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi32_epi8&expand=1439)
13448#[inline]
13449#[target_feature(enable = "avx512f")]
13450#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13451#[cfg_attr(test, assert_instr(vpmovdb))]
13452#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
13453pub const fn _mm512_maskz_cvtepi32_epi8(k: __mmask16, a: __m512i) -> __m128i {
13454 unsafe {
13455 let convert: Simd = _mm512_cvtepi32_epi8(a).as_i8x16();
13456 transmute(src:simd_select_bitmask(m:k, yes:convert, no:i8x16::ZERO))
13457 }
13458}
13459
13460/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the results in dst.
13461///
13462/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepi32_epi8&expand=1434)
13463#[inline]
13464#[target_feature(enable = "avx512f,avx512vl")]
13465#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13466#[cfg_attr(test, assert_instr(vpmovdb))]
13467pub fn _mm256_cvtepi32_epi8(a: __m256i) -> __m128i {
13468 unsafe { transmute(src:vpmovdb256(a.as_i32x8(), src:i8x16::ZERO, mask:0b11111111)) }
13469}
13470
13471/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13472///
13473/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi32_epi8&expand=1435)
13474#[inline]
13475#[target_feature(enable = "avx512f,avx512vl")]
13476#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13477#[cfg_attr(test, assert_instr(vpmovdb))]
13478pub fn _mm256_mask_cvtepi32_epi8(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
13479 unsafe { transmute(src:vpmovdb256(a.as_i32x8(), src.as_i8x16(), mask:k)) }
13480}
13481
13482/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13483///
13484/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi32_epi8&expand=1436)
13485#[inline]
13486#[target_feature(enable = "avx512f,avx512vl")]
13487#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13488#[cfg_attr(test, assert_instr(vpmovdb))]
13489pub fn _mm256_maskz_cvtepi32_epi8(k: __mmask8, a: __m256i) -> __m128i {
13490 unsafe { transmute(src:vpmovdb256(a.as_i32x8(), src:i8x16::ZERO, mask:k)) }
13491}
13492
13493/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the results in dst.
13494///
13495/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi32_epi8&expand=1431)
13496#[inline]
13497#[target_feature(enable = "avx512f,avx512vl")]
13498#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13499#[cfg_attr(test, assert_instr(vpmovdb))]
13500pub fn _mm_cvtepi32_epi8(a: __m128i) -> __m128i {
13501 unsafe { transmute(src:vpmovdb128(a.as_i32x4(), src:i8x16::ZERO, mask:0b11111111)) }
13502}
13503
13504/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13505///
13506/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi32_epi8&expand=1432)
13507#[inline]
13508#[target_feature(enable = "avx512f,avx512vl")]
13509#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13510#[cfg_attr(test, assert_instr(vpmovdb))]
13511pub fn _mm_mask_cvtepi32_epi8(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
13512 unsafe { transmute(src:vpmovdb128(a.as_i32x4(), src.as_i8x16(), mask:k)) }
13513}
13514
13515/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13516///
13517/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi32_epi8&expand=1433)
13518#[inline]
13519#[target_feature(enable = "avx512f,avx512vl")]
13520#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13521#[cfg_attr(test, assert_instr(vpmovdb))]
13522pub fn _mm_maskz_cvtepi32_epi8(k: __mmask8, a: __m128i) -> __m128i {
13523 unsafe { transmute(src:vpmovdb128(a.as_i32x4(), src:i8x16::ZERO, mask:k)) }
13524}
13525
13526/// Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the results in dst.
13527///
13528/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi64_epi32&expand=1481)
13529#[inline]
13530#[target_feature(enable = "avx512f")]
13531#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13532#[cfg_attr(test, assert_instr(vpmovqd))]
13533#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
13534pub const fn _mm512_cvtepi64_epi32(a: __m512i) -> __m256i {
13535 unsafe {
13536 let a: Simd = a.as_i64x8();
13537 transmute::<i32x8, _>(src:simd_cast(a))
13538 }
13539}
13540
13541/// Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13542///
13543/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi64_epi32&expand=1482)
13544#[inline]
13545#[target_feature(enable = "avx512f")]
13546#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13547#[cfg_attr(test, assert_instr(vpmovqd))]
13548#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
13549pub const fn _mm512_mask_cvtepi64_epi32(src: __m256i, k: __mmask8, a: __m512i) -> __m256i {
13550 unsafe {
13551 let convert: Simd = _mm512_cvtepi64_epi32(a).as_i32x8();
13552 transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i32x8()))
13553 }
13554}
13555
13556/// Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13557///
13558/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi64_epi32&expand=1483)
13559#[inline]
13560#[target_feature(enable = "avx512f")]
13561#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13562#[cfg_attr(test, assert_instr(vpmovqd))]
13563#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
13564pub const fn _mm512_maskz_cvtepi64_epi32(k: __mmask8, a: __m512i) -> __m256i {
13565 unsafe {
13566 let convert: Simd = _mm512_cvtepi64_epi32(a).as_i32x8();
13567 transmute(src:simd_select_bitmask(m:k, yes:convert, no:i32x8::ZERO))
13568 }
13569}
13570
13571/// Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the results in dst.
13572///
13573/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepi64_epi32&expand=1478)
13574#[inline]
13575#[target_feature(enable = "avx512f,avx512vl")]
13576#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13577#[cfg_attr(test, assert_instr(vpmovqd))]
13578#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
13579pub const fn _mm256_cvtepi64_epi32(a: __m256i) -> __m128i {
13580 unsafe {
13581 let a: Simd = a.as_i64x4();
13582 transmute::<i32x4, _>(src:simd_cast(a))
13583 }
13584}
13585
13586/// Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13587///
13588/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi64_epi32&expand=1479)
13589#[inline]
13590#[target_feature(enable = "avx512f,avx512vl")]
13591#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13592#[cfg_attr(test, assert_instr(vpmovqd))]
13593#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
13594pub const fn _mm256_mask_cvtepi64_epi32(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
13595 unsafe {
13596 let convert: Simd = _mm256_cvtepi64_epi32(a).as_i32x4();
13597 transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i32x4()))
13598 }
13599}
13600
13601/// Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13602///
13603/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi64_epi32&expand=1480)
13604#[inline]
13605#[target_feature(enable = "avx512f,avx512vl")]
13606#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13607#[cfg_attr(test, assert_instr(vpmovqd))]
13608#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
13609pub const fn _mm256_maskz_cvtepi64_epi32(k: __mmask8, a: __m256i) -> __m128i {
13610 unsafe {
13611 let convert: Simd = _mm256_cvtepi64_epi32(a).as_i32x4();
13612 transmute(src:simd_select_bitmask(m:k, yes:convert, no:i32x4::ZERO))
13613 }
13614}
13615
13616/// Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the results in dst.
13617///
13618/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi64_epi32&expand=1475)
13619#[inline]
13620#[target_feature(enable = "avx512f,avx512vl")]
13621#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13622#[cfg_attr(test, assert_instr(vpmovqd))]
13623pub fn _mm_cvtepi64_epi32(a: __m128i) -> __m128i {
13624 unsafe { transmute(src:vpmovqd128(a.as_i64x2(), src:i32x4::ZERO, mask:0b11111111)) }
13625}
13626
13627/// Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13628///
13629/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi64_epi32&expand=1476)
13630#[inline]
13631#[target_feature(enable = "avx512f,avx512vl")]
13632#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13633#[cfg_attr(test, assert_instr(vpmovqd))]
13634pub fn _mm_mask_cvtepi64_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
13635 unsafe { transmute(src:vpmovqd128(a.as_i64x2(), src.as_i32x4(), mask:k)) }
13636}
13637
13638/// Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13639///
13640/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi64_epi32&expand=1477)
13641#[inline]
13642#[target_feature(enable = "avx512f,avx512vl")]
13643#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13644#[cfg_attr(test, assert_instr(vpmovqd))]
13645pub fn _mm_maskz_cvtepi64_epi32(k: __mmask8, a: __m128i) -> __m128i {
13646 unsafe { transmute(src:vpmovqd128(a.as_i64x2(), src:i32x4::ZERO, mask:k)) }
13647}
13648
13649/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the results in dst.
13650///
13651/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi64_epi16&expand=1472)
13652#[inline]
13653#[target_feature(enable = "avx512f")]
13654#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13655#[cfg_attr(test, assert_instr(vpmovqw))]
13656#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
13657pub const fn _mm512_cvtepi64_epi16(a: __m512i) -> __m128i {
13658 unsafe {
13659 let a: Simd = a.as_i64x8();
13660 transmute::<i16x8, _>(src:simd_cast(a))
13661 }
13662}
13663
13664/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13665///
13666/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi64_epi16&expand=1473)
13667#[inline]
13668#[target_feature(enable = "avx512f")]
13669#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13670#[cfg_attr(test, assert_instr(vpmovqw))]
13671#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
13672pub const fn _mm512_mask_cvtepi64_epi16(src: __m128i, k: __mmask8, a: __m512i) -> __m128i {
13673 unsafe {
13674 let convert: Simd = _mm512_cvtepi64_epi16(a).as_i16x8();
13675 transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i16x8()))
13676 }
13677}
13678
13679/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13680///
13681/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi64_epi16&expand=1474)
13682#[inline]
13683#[target_feature(enable = "avx512f")]
13684#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13685#[cfg_attr(test, assert_instr(vpmovqw))]
13686#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
13687pub const fn _mm512_maskz_cvtepi64_epi16(k: __mmask8, a: __m512i) -> __m128i {
13688 unsafe {
13689 let convert: Simd = _mm512_cvtepi64_epi16(a).as_i16x8();
13690 transmute(src:simd_select_bitmask(m:k, yes:convert, no:i16x8::ZERO))
13691 }
13692}
13693
13694/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the results in dst.
13695///
13696/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepi64_epi16&expand=1469)
13697#[inline]
13698#[target_feature(enable = "avx512f,avx512vl")]
13699#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13700#[cfg_attr(test, assert_instr(vpmovqw))]
13701pub fn _mm256_cvtepi64_epi16(a: __m256i) -> __m128i {
13702 unsafe { transmute(src:vpmovqw256(a.as_i64x4(), src:i16x8::ZERO, mask:0b11111111)) }
13703}
13704
13705/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13706///
13707/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi64_epi16&expand=1470)
13708#[inline]
13709#[target_feature(enable = "avx512f,avx512vl")]
13710#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13711#[cfg_attr(test, assert_instr(vpmovqw))]
13712pub fn _mm256_mask_cvtepi64_epi16(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
13713 unsafe { transmute(src:vpmovqw256(a.as_i64x4(), src.as_i16x8(), mask:k)) }
13714}
13715
13716/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13717///
13718/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi64_epi16&expand=1471)
13719#[inline]
13720#[target_feature(enable = "avx512f,avx512vl")]
13721#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13722#[cfg_attr(test, assert_instr(vpmovqw))]
13723pub fn _mm256_maskz_cvtepi64_epi16(k: __mmask8, a: __m256i) -> __m128i {
13724 unsafe { transmute(src:vpmovqw256(a.as_i64x4(), src:i16x8::ZERO, mask:k)) }
13725}
13726
13727/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the results in dst.
13728///
13729/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi64_epi16&expand=1466)
13730#[inline]
13731#[target_feature(enable = "avx512f,avx512vl")]
13732#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13733#[cfg_attr(test, assert_instr(vpmovqw))]
13734pub fn _mm_cvtepi64_epi16(a: __m128i) -> __m128i {
13735 unsafe { transmute(src:vpmovqw128(a.as_i64x2(), src:i16x8::ZERO, mask:0b11111111)) }
13736}
13737
13738/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13739///
13740/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi64_epi16&expand=1467)
13741#[inline]
13742#[target_feature(enable = "avx512f,avx512vl")]
13743#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13744#[cfg_attr(test, assert_instr(vpmovqw))]
13745pub fn _mm_mask_cvtepi64_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
13746 unsafe { transmute(src:vpmovqw128(a.as_i64x2(), src.as_i16x8(), mask:k)) }
13747}
13748
13749/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13750///
13751/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi64_epi16&expand=1468)
13752#[inline]
13753#[target_feature(enable = "avx512f,avx512vl")]
13754#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13755#[cfg_attr(test, assert_instr(vpmovqw))]
13756pub fn _mm_maskz_cvtepi64_epi16(k: __mmask8, a: __m128i) -> __m128i {
13757 unsafe { transmute(src:vpmovqw128(a.as_i64x2(), src:i16x8::ZERO, mask:k)) }
13758}
13759
13760/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the results in dst.
13761///
13762/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi64_epi8&expand=1490)
13763#[inline]
13764#[target_feature(enable = "avx512f")]
13765#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13766#[cfg_attr(test, assert_instr(vpmovqb))]
13767pub fn _mm512_cvtepi64_epi8(a: __m512i) -> __m128i {
13768 unsafe { transmute(src:vpmovqb(a.as_i64x8(), src:i8x16::ZERO, mask:0b11111111)) }
13769}
13770
13771/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13772///
13773/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi64_epi8&expand=1491)
13774#[inline]
13775#[target_feature(enable = "avx512f")]
13776#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13777#[cfg_attr(test, assert_instr(vpmovqb))]
13778pub fn _mm512_mask_cvtepi64_epi8(src: __m128i, k: __mmask8, a: __m512i) -> __m128i {
13779 unsafe { transmute(src:vpmovqb(a.as_i64x8(), src.as_i8x16(), mask:k)) }
13780}
13781
13782/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13783///
13784/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi64_epi8&expand=1492)
13785#[inline]
13786#[target_feature(enable = "avx512f")]
13787#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13788#[cfg_attr(test, assert_instr(vpmovqb))]
13789pub fn _mm512_maskz_cvtepi64_epi8(k: __mmask8, a: __m512i) -> __m128i {
13790 unsafe { transmute(src:vpmovqb(a.as_i64x8(), src:i8x16::ZERO, mask:k)) }
13791}
13792
13793/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the results in dst.
13794///
13795/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepi64_epi8&expand=1487)
13796#[inline]
13797#[target_feature(enable = "avx512f,avx512vl")]
13798#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13799#[cfg_attr(test, assert_instr(vpmovqb))]
13800pub fn _mm256_cvtepi64_epi8(a: __m256i) -> __m128i {
13801 unsafe { transmute(src:vpmovqb256(a.as_i64x4(), src:i8x16::ZERO, mask:0b11111111)) }
13802}
13803
13804/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13805///
13806/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi64_epi8&expand=1488)
13807#[inline]
13808#[target_feature(enable = "avx512f,avx512vl")]
13809#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13810#[cfg_attr(test, assert_instr(vpmovqb))]
13811pub fn _mm256_mask_cvtepi64_epi8(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
13812 unsafe { transmute(src:vpmovqb256(a.as_i64x4(), src.as_i8x16(), mask:k)) }
13813}
13814
13815/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13816///
13817/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi64_epi8&expand=1489)
13818#[inline]
13819#[target_feature(enable = "avx512f,avx512vl")]
13820#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13821#[cfg_attr(test, assert_instr(vpmovqb))]
13822pub fn _mm256_maskz_cvtepi64_epi8(k: __mmask8, a: __m256i) -> __m128i {
13823 unsafe { transmute(src:vpmovqb256(a.as_i64x4(), src:i8x16::ZERO, mask:k)) }
13824}
13825
13826/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the results in dst.
13827///
13828/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi64_epi8&expand=1484)
13829#[inline]
13830#[target_feature(enable = "avx512f,avx512vl")]
13831#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13832#[cfg_attr(test, assert_instr(vpmovqb))]
13833pub fn _mm_cvtepi64_epi8(a: __m128i) -> __m128i {
13834 unsafe { transmute(src:vpmovqb128(a.as_i64x2(), src:i8x16::ZERO, mask:0b11111111)) }
13835}
13836
13837/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13838///
13839/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi64_epi8&expand=1485)
13840#[inline]
13841#[target_feature(enable = "avx512f,avx512vl")]
13842#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13843#[cfg_attr(test, assert_instr(vpmovqb))]
13844pub fn _mm_mask_cvtepi64_epi8(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
13845 unsafe { transmute(src:vpmovqb128(a.as_i64x2(), src.as_i8x16(), mask:k)) }
13846}
13847
13848/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13849///
13850/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi64_epi8&expand=1486)
13851#[inline]
13852#[target_feature(enable = "avx512f,avx512vl")]
13853#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13854#[cfg_attr(test, assert_instr(vpmovqb))]
13855pub fn _mm_maskz_cvtepi64_epi8(k: __mmask8, a: __m128i) -> __m128i {
13856 unsafe { transmute(src:vpmovqb128(a.as_i64x2(), src:i8x16::ZERO, mask:k)) }
13857}
13858
13859/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst.
13860///
13861/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtsepi32_epi16&expand=1819)
13862#[inline]
13863#[target_feature(enable = "avx512f")]
13864#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13865#[cfg_attr(test, assert_instr(vpmovsdw))]
13866pub fn _mm512_cvtsepi32_epi16(a: __m512i) -> __m256i {
13867 unsafe { transmute(src:vpmovsdw(a.as_i32x16(), src:i16x16::ZERO, mask:0b11111111_11111111)) }
13868}
13869
13870/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13871///
13872/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi32_epi16&expand=1820)
13873#[inline]
13874#[target_feature(enable = "avx512f")]
13875#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13876#[cfg_attr(test, assert_instr(vpmovsdw))]
13877pub fn _mm512_mask_cvtsepi32_epi16(src: __m256i, k: __mmask16, a: __m512i) -> __m256i {
13878 unsafe { transmute(src:vpmovsdw(a.as_i32x16(), src.as_i16x16(), mask:k)) }
13879}
13880
13881/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst.
13882///
13883/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtsepi32_epi16&expand=1819)
13884#[inline]
13885#[target_feature(enable = "avx512f")]
13886#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13887#[cfg_attr(test, assert_instr(vpmovsdw))]
13888pub fn _mm512_maskz_cvtsepi32_epi16(k: __mmask16, a: __m512i) -> __m256i {
13889 unsafe { transmute(src:vpmovsdw(a.as_i32x16(), src:i16x16::ZERO, mask:k)) }
13890}
13891
13892/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst.
13893///
13894/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtsepi32_epi16&expand=1816)
13895#[inline]
13896#[target_feature(enable = "avx512f,avx512vl")]
13897#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13898#[cfg_attr(test, assert_instr(vpmovsdw))]
13899pub fn _mm256_cvtsepi32_epi16(a: __m256i) -> __m128i {
13900 unsafe { transmute(src:vpmovsdw256(a.as_i32x8(), src:i16x8::ZERO, mask:0b11111111)) }
13901}
13902
13903/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13904///
13905/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi32_epi16&expand=1817)
13906#[inline]
13907#[target_feature(enable = "avx512f,avx512vl")]
13908#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13909#[cfg_attr(test, assert_instr(vpmovsdw))]
13910pub fn _mm256_mask_cvtsepi32_epi16(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
13911 unsafe { transmute(src:vpmovsdw256(a.as_i32x8(), src.as_i16x8(), mask:k)) }
13912}
13913
13914/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst.
13915///
13916/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtsepi32_epi16&expand=1818)
13917#[inline]
13918#[target_feature(enable = "avx512f,avx512vl")]
13919#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13920#[cfg_attr(test, assert_instr(vpmovsdw))]
13921pub fn _mm256_maskz_cvtsepi32_epi16(k: __mmask8, a: __m256i) -> __m128i {
13922 unsafe { transmute(src:vpmovsdw256(a.as_i32x8(), src:i16x8::ZERO, mask:k)) }
13923}
13924
13925/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst.
13926///
13927/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsepi32_epi16&expand=1813)
13928#[inline]
13929#[target_feature(enable = "avx512f,avx512vl")]
13930#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13931#[cfg_attr(test, assert_instr(vpmovsdw))]
13932pub fn _mm_cvtsepi32_epi16(a: __m128i) -> __m128i {
13933 unsafe { transmute(src:vpmovsdw128(a.as_i32x4(), src:i16x8::ZERO, mask:0b11111111)) }
13934}
13935
13936/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13937///
13938/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi32_epi16&expand=1814)
13939#[inline]
13940#[target_feature(enable = "avx512f,avx512vl")]
13941#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13942#[cfg_attr(test, assert_instr(vpmovsdw))]
13943pub fn _mm_mask_cvtsepi32_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
13944 unsafe { transmute(src:vpmovsdw128(a.as_i32x4(), src.as_i16x8(), mask:k)) }
13945}
13946
13947/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst.
13948///
13949/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtsepi32_epi16&expand=1815)
13950#[inline]
13951#[target_feature(enable = "avx512f,avx512vl")]
13952#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13953#[cfg_attr(test, assert_instr(vpmovsdw))]
13954pub fn _mm_maskz_cvtsepi32_epi16(k: __mmask8, a: __m128i) -> __m128i {
13955 unsafe { transmute(src:vpmovsdw128(a.as_i32x4(), src:i16x8::ZERO, mask:k)) }
13956}
13957
13958/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst.
13959///
13960/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtsepi32_epi8&expand=1828)
13961#[inline]
13962#[target_feature(enable = "avx512f")]
13963#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13964#[cfg_attr(test, assert_instr(vpmovsdb))]
13965pub fn _mm512_cvtsepi32_epi8(a: __m512i) -> __m128i {
13966 unsafe { transmute(src:vpmovsdb(a.as_i32x16(), src:i8x16::ZERO, mask:0b11111111_11111111)) }
13967}
13968
13969/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13970///
13971/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi32_epi8&expand=1829)
13972#[inline]
13973#[target_feature(enable = "avx512f")]
13974#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13975#[cfg_attr(test, assert_instr(vpmovsdb))]
13976pub fn _mm512_mask_cvtsepi32_epi8(src: __m128i, k: __mmask16, a: __m512i) -> __m128i {
13977 unsafe { transmute(src:vpmovsdb(a.as_i32x16(), src.as_i8x16(), mask:k)) }
13978}
13979
13980/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13981///
13982/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtsepi32_epi8&expand=1830)
13983#[inline]
13984#[target_feature(enable = "avx512f")]
13985#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13986#[cfg_attr(test, assert_instr(vpmovsdb))]
13987pub fn _mm512_maskz_cvtsepi32_epi8(k: __mmask16, a: __m512i) -> __m128i {
13988 unsafe { transmute(src:vpmovsdb(a.as_i32x16(), src:i8x16::ZERO, mask:k)) }
13989}
13990
13991/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst.
13992///
13993/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtsepi32_epi8&expand=1825)
13994#[inline]
13995#[target_feature(enable = "avx512f,avx512vl")]
13996#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13997#[cfg_attr(test, assert_instr(vpmovsdb))]
13998pub fn _mm256_cvtsepi32_epi8(a: __m256i) -> __m128i {
13999 unsafe { transmute(src:vpmovsdb256(a.as_i32x8(), src:i8x16::ZERO, mask:0b11111111)) }
14000}
14001
14002/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14003///
14004/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi32_epi8&expand=1826)
14005#[inline]
14006#[target_feature(enable = "avx512f,avx512vl")]
14007#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14008#[cfg_attr(test, assert_instr(vpmovsdb))]
14009pub fn _mm256_mask_cvtsepi32_epi8(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
14010 unsafe { transmute(src:vpmovsdb256(a.as_i32x8(), src.as_i8x16(), mask:k)) }
14011}
14012
14013/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14014///
14015/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtsepi32_epi8&expand=1827)
14016#[inline]
14017#[target_feature(enable = "avx512f,avx512vl")]
14018#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14019#[cfg_attr(test, assert_instr(vpmovsdb))]
14020pub fn _mm256_maskz_cvtsepi32_epi8(k: __mmask8, a: __m256i) -> __m128i {
14021 unsafe { transmute(src:vpmovsdb256(a.as_i32x8(), src:i8x16::ZERO, mask:k)) }
14022}
14023
14024/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst.
14025///
14026/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsepi32_epi8&expand=1822)
14027#[inline]
14028#[target_feature(enable = "avx512f,avx512vl")]
14029#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14030#[cfg_attr(test, assert_instr(vpmovsdb))]
14031pub fn _mm_cvtsepi32_epi8(a: __m128i) -> __m128i {
14032 unsafe { transmute(src:vpmovsdb128(a.as_i32x4(), src:i8x16::ZERO, mask:0b11111111)) }
14033}
14034
14035/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14036///
14037/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi32_epi8&expand=1823)
14038#[inline]
14039#[target_feature(enable = "avx512f,avx512vl")]
14040#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14041#[cfg_attr(test, assert_instr(vpmovsdb))]
14042pub fn _mm_mask_cvtsepi32_epi8(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
14043 unsafe { transmute(src:vpmovsdb128(a.as_i32x4(), src.as_i8x16(), mask:k)) }
14044}
14045
14046/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14047///
14048/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtsepi32_epi8&expand=1824)
14049#[inline]
14050#[target_feature(enable = "avx512f,avx512vl")]
14051#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14052#[cfg_attr(test, assert_instr(vpmovsdb))]
14053pub fn _mm_maskz_cvtsepi32_epi8(k: __mmask8, a: __m128i) -> __m128i {
14054 unsafe { transmute(src:vpmovsdb128(a.as_i32x4(), src:i8x16::ZERO, mask:k)) }
14055}
14056
14057/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the results in dst.
14058///
14059/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtsepi64_epi32&expand=1852)
14060#[inline]
14061#[target_feature(enable = "avx512f")]
14062#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14063#[cfg_attr(test, assert_instr(vpmovsqd))]
14064pub fn _mm512_cvtsepi64_epi32(a: __m512i) -> __m256i {
14065 unsafe { transmute(src:vpmovsqd(a.as_i64x8(), src:i32x8::ZERO, mask:0b11111111)) }
14066}
14067
14068/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14069///
14070/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi64_epi32&expand=1853)
14071#[inline]
14072#[target_feature(enable = "avx512f")]
14073#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14074#[cfg_attr(test, assert_instr(vpmovsqd))]
14075pub fn _mm512_mask_cvtsepi64_epi32(src: __m256i, k: __mmask8, a: __m512i) -> __m256i {
14076 unsafe { transmute(src:vpmovsqd(a.as_i64x8(), src.as_i32x8(), mask:k)) }
14077}
14078
14079/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14080///
14081/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtsepi64_epi32&expand=1854)
14082#[inline]
14083#[target_feature(enable = "avx512f")]
14084#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14085#[cfg_attr(test, assert_instr(vpmovsqd))]
14086pub fn _mm512_maskz_cvtsepi64_epi32(k: __mmask8, a: __m512i) -> __m256i {
14087 unsafe { transmute(src:vpmovsqd(a.as_i64x8(), src:i32x8::ZERO, mask:k)) }
14088}
14089
14090/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the results in dst.
14091///
14092/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtsepi64_epi32&expand=1849)
14093#[inline]
14094#[target_feature(enable = "avx512f,avx512vl")]
14095#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14096#[cfg_attr(test, assert_instr(vpmovsqd))]
14097pub fn _mm256_cvtsepi64_epi32(a: __m256i) -> __m128i {
14098 unsafe { transmute(src:vpmovsqd256(a.as_i64x4(), src:i32x4::ZERO, mask:0b11111111)) }
14099}
14100
14101/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14102///
14103/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi64_epi32&expand=1850)
14104#[inline]
14105#[target_feature(enable = "avx512f,avx512vl")]
14106#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14107#[cfg_attr(test, assert_instr(vpmovsqd))]
14108pub fn _mm256_mask_cvtsepi64_epi32(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
14109 unsafe { transmute(src:vpmovsqd256(a.as_i64x4(), src.as_i32x4(), mask:k)) }
14110}
14111
14112/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14113///
14114/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtsepi64_epi32&expand=1851)
14115#[inline]
14116#[target_feature(enable = "avx512f,avx512vl")]
14117#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14118#[cfg_attr(test, assert_instr(vpmovsqd))]
14119pub fn _mm256_maskz_cvtsepi64_epi32(k: __mmask8, a: __m256i) -> __m128i {
14120 unsafe { transmute(src:vpmovsqd256(a.as_i64x4(), src:i32x4::ZERO, mask:k)) }
14121}
14122
14123/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the results in dst.
14124///
14125/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsepi64_epi32&expand=1846)
14126#[inline]
14127#[target_feature(enable = "avx512f,avx512vl")]
14128#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14129#[cfg_attr(test, assert_instr(vpmovsqd))]
14130pub fn _mm_cvtsepi64_epi32(a: __m128i) -> __m128i {
14131 unsafe { transmute(src:vpmovsqd128(a.as_i64x2(), src:i32x4::ZERO, mask:0b11111111)) }
14132}
14133
14134/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14135///
14136/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi64_epi32&expand=1847)
14137#[inline]
14138#[target_feature(enable = "avx512f,avx512vl")]
14139#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14140#[cfg_attr(test, assert_instr(vpmovsqd))]
14141pub fn _mm_mask_cvtsepi64_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
14142 unsafe { transmute(src:vpmovsqd128(a.as_i64x2(), src.as_i32x4(), mask:k)) }
14143}
14144
14145/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14146///
14147/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtsepi64_epi32&expand=1848)
14148#[inline]
14149#[target_feature(enable = "avx512f,avx512vl")]
14150#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14151#[cfg_attr(test, assert_instr(vpmovsqd))]
14152pub fn _mm_maskz_cvtsepi64_epi32(k: __mmask8, a: __m128i) -> __m128i {
14153 unsafe { transmute(src:vpmovsqd128(a.as_i64x2(), src:i32x4::ZERO, mask:k)) }
14154}
14155
14156/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst.
14157///
14158/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtsepi64_epi16&expand=1843)
14159#[inline]
14160#[target_feature(enable = "avx512f")]
14161#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14162#[cfg_attr(test, assert_instr(vpmovsqw))]
14163pub fn _mm512_cvtsepi64_epi16(a: __m512i) -> __m128i {
14164 unsafe { transmute(src:vpmovsqw(a.as_i64x8(), src:i16x8::ZERO, mask:0b11111111)) }
14165}
14166
14167/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14168///
14169/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi64_epi16&expand=1844)
14170#[inline]
14171#[target_feature(enable = "avx512f")]
14172#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14173#[cfg_attr(test, assert_instr(vpmovsqw))]
14174pub fn _mm512_mask_cvtsepi64_epi16(src: __m128i, k: __mmask8, a: __m512i) -> __m128i {
14175 unsafe { transmute(src:vpmovsqw(a.as_i64x8(), src.as_i16x8(), mask:k)) }
14176}
14177
14178/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14179///
14180/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtsepi64_epi16&expand=1845)
14181#[inline]
14182#[target_feature(enable = "avx512f")]
14183#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14184#[cfg_attr(test, assert_instr(vpmovsqw))]
14185pub fn _mm512_maskz_cvtsepi64_epi16(k: __mmask8, a: __m512i) -> __m128i {
14186 unsafe { transmute(src:vpmovsqw(a.as_i64x8(), src:i16x8::ZERO, mask:k)) }
14187}
14188
14189/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst.
14190///
14191/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtsepi64_epi16&expand=1840)
14192#[inline]
14193#[target_feature(enable = "avx512f,avx512vl")]
14194#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14195#[cfg_attr(test, assert_instr(vpmovsqw))]
14196pub fn _mm256_cvtsepi64_epi16(a: __m256i) -> __m128i {
14197 unsafe { transmute(src:vpmovsqw256(a.as_i64x4(), src:i16x8::ZERO, mask:0b11111111)) }
14198}
14199
14200/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14201///
14202/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi64_epi16&expand=1841)
14203#[inline]
14204#[target_feature(enable = "avx512f,avx512vl")]
14205#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14206#[cfg_attr(test, assert_instr(vpmovsqw))]
14207pub fn _mm256_mask_cvtsepi64_epi16(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
14208 unsafe { transmute(src:vpmovsqw256(a.as_i64x4(), src.as_i16x8(), mask:k)) }
14209}
14210
14211/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14212///
14213/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtsepi64_epi16&expand=1842)
14214#[inline]
14215#[target_feature(enable = "avx512f,avx512vl")]
14216#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14217#[cfg_attr(test, assert_instr(vpmovsqw))]
14218pub fn _mm256_maskz_cvtsepi64_epi16(k: __mmask8, a: __m256i) -> __m128i {
14219 unsafe { transmute(src:vpmovsqw256(a.as_i64x4(), src:i16x8::ZERO, mask:k)) }
14220}
14221
14222/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst.
14223///
14224/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsepi64_epi16&expand=1837)
14225#[inline]
14226#[target_feature(enable = "avx512f,avx512vl")]
14227#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14228#[cfg_attr(test, assert_instr(vpmovsqw))]
14229pub fn _mm_cvtsepi64_epi16(a: __m128i) -> __m128i {
14230 unsafe { transmute(src:vpmovsqw128(a.as_i64x2(), src:i16x8::ZERO, mask:0b11111111)) }
14231}
14232
14233/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14234///
14235/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi64_epi16&expand=1838)
14236#[inline]
14237#[target_feature(enable = "avx512f,avx512vl")]
14238#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14239#[cfg_attr(test, assert_instr(vpmovsqw))]
14240pub fn _mm_mask_cvtsepi64_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
14241 unsafe { transmute(src:vpmovsqw128(a.as_i64x2(), src.as_i16x8(), mask:k)) }
14242}
14243
14244/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14245///
14246/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtsepi64_epi16&expand=1839)
14247#[inline]
14248#[target_feature(enable = "avx512f,avx512vl")]
14249#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14250#[cfg_attr(test, assert_instr(vpmovsqw))]
14251pub fn _mm_maskz_cvtsepi64_epi16(k: __mmask8, a: __m128i) -> __m128i {
14252 unsafe { transmute(src:vpmovsqw128(a.as_i64x2(), src:i16x8::ZERO, mask:k)) }
14253}
14254
14255/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst.
14256///
14257/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtsepi64_epi8&expand=1861)
14258#[inline]
14259#[target_feature(enable = "avx512f")]
14260#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14261#[cfg_attr(test, assert_instr(vpmovsqb))]
14262pub fn _mm512_cvtsepi64_epi8(a: __m512i) -> __m128i {
14263 unsafe { transmute(src:vpmovsqb(a.as_i64x8(), src:i8x16::ZERO, mask:0b11111111)) }
14264}
14265
14266/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14267///
14268/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi64_epi8&expand=1862)
14269#[inline]
14270#[target_feature(enable = "avx512f")]
14271#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14272#[cfg_attr(test, assert_instr(vpmovsqb))]
14273pub fn _mm512_mask_cvtsepi64_epi8(src: __m128i, k: __mmask8, a: __m512i) -> __m128i {
14274 unsafe { transmute(src:vpmovsqb(a.as_i64x8(), src.as_i8x16(), mask:k)) }
14275}
14276
14277/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14278///
14279/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtsepi64_epi8&expand=1863)
14280#[inline]
14281#[target_feature(enable = "avx512f")]
14282#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14283#[cfg_attr(test, assert_instr(vpmovsqb))]
14284pub fn _mm512_maskz_cvtsepi64_epi8(k: __mmask8, a: __m512i) -> __m128i {
14285 unsafe { transmute(src:vpmovsqb(a.as_i64x8(), src:i8x16::ZERO, mask:k)) }
14286}
14287
14288/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst.
14289///
14290/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtsepi64_epi8&expand=1858)
14291#[inline]
14292#[target_feature(enable = "avx512f,avx512vl")]
14293#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14294#[cfg_attr(test, assert_instr(vpmovsqb))]
14295pub fn _mm256_cvtsepi64_epi8(a: __m256i) -> __m128i {
14296 unsafe { transmute(src:vpmovsqb256(a.as_i64x4(), src:i8x16::ZERO, mask:0b11111111)) }
14297}
14298
14299/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14300///
14301/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi64_epi8&expand=1859)
14302#[inline]
14303#[target_feature(enable = "avx512f,avx512vl")]
14304#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14305#[cfg_attr(test, assert_instr(vpmovsqb))]
14306pub fn _mm256_mask_cvtsepi64_epi8(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
14307 unsafe { transmute(src:vpmovsqb256(a.as_i64x4(), src.as_i8x16(), mask:k)) }
14308}
14309
14310/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14311///
14312/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtsepi64_epi8&expand=1860)
14313#[inline]
14314#[target_feature(enable = "avx512f,avx512vl")]
14315#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14316#[cfg_attr(test, assert_instr(vpmovsqb))]
14317pub fn _mm256_maskz_cvtsepi64_epi8(k: __mmask8, a: __m256i) -> __m128i {
14318 unsafe { transmute(src:vpmovsqb256(a.as_i64x4(), src:i8x16::ZERO, mask:k)) }
14319}
14320
14321/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst.
14322///
14323/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsepi64_epi8&expand=1855)
14324#[inline]
14325#[target_feature(enable = "avx512f,avx512vl")]
14326#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14327#[cfg_attr(test, assert_instr(vpmovsqb))]
14328pub fn _mm_cvtsepi64_epi8(a: __m128i) -> __m128i {
14329 unsafe { transmute(src:vpmovsqb128(a.as_i64x2(), src:i8x16::ZERO, mask:0b11111111)) }
14330}
14331
14332/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14333///
14334/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi64_epi8&expand=1856)
14335#[inline]
14336#[target_feature(enable = "avx512f,avx512vl")]
14337#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14338#[cfg_attr(test, assert_instr(vpmovsqb))]
14339pub fn _mm_mask_cvtsepi64_epi8(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
14340 unsafe { transmute(src:vpmovsqb128(a.as_i64x2(), src.as_i8x16(), mask:k)) }
14341}
14342
14343/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14344///
14345/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtsepi64_epi8&expand=1857)
14346#[inline]
14347#[target_feature(enable = "avx512f,avx512vl")]
14348#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14349#[cfg_attr(test, assert_instr(vpmovsqb))]
14350pub fn _mm_maskz_cvtsepi64_epi8(k: __mmask8, a: __m128i) -> __m128i {
14351 unsafe { transmute(src:vpmovsqb128(a.as_i64x2(), src:i8x16::ZERO, mask:k)) }
14352}
14353
14354/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst.
14355///
14356/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtusepi32_epi16&expand=2054)
14357#[inline]
14358#[target_feature(enable = "avx512f")]
14359#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14360#[cfg_attr(test, assert_instr(vpmovusdw))]
14361pub fn _mm512_cvtusepi32_epi16(a: __m512i) -> __m256i {
14362 unsafe { transmute(src:vpmovusdw(a.as_u32x16(), src:u16x16::ZERO, mask:0b11111111_11111111)) }
14363}
14364
14365/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14366///
14367/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi32_epi16&expand=2055)
14368#[inline]
14369#[target_feature(enable = "avx512f")]
14370#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14371#[cfg_attr(test, assert_instr(vpmovusdw))]
14372pub fn _mm512_mask_cvtusepi32_epi16(src: __m256i, k: __mmask16, a: __m512i) -> __m256i {
14373 unsafe { transmute(src:vpmovusdw(a.as_u32x16(), src.as_u16x16(), mask:k)) }
14374}
14375
14376/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14377///
14378/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtusepi32_epi16&expand=2056)
14379#[inline]
14380#[target_feature(enable = "avx512f")]
14381#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14382#[cfg_attr(test, assert_instr(vpmovusdw))]
14383pub fn _mm512_maskz_cvtusepi32_epi16(k: __mmask16, a: __m512i) -> __m256i {
14384 unsafe { transmute(src:vpmovusdw(a.as_u32x16(), src:u16x16::ZERO, mask:k)) }
14385}
14386
14387/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst.
14388///
14389/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtusepi32_epi16&expand=2051)
14390#[inline]
14391#[target_feature(enable = "avx512f,avx512vl")]
14392#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14393#[cfg_attr(test, assert_instr(vpmovusdw))]
14394pub fn _mm256_cvtusepi32_epi16(a: __m256i) -> __m128i {
14395 unsafe { transmute(src:vpmovusdw256(a.as_u32x8(), src:u16x8::ZERO, mask:0b11111111)) }
14396}
14397
14398/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14399///
14400/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi32_epi16&expand=2052)
14401#[inline]
14402#[target_feature(enable = "avx512f,avx512vl")]
14403#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14404#[cfg_attr(test, assert_instr(vpmovusdw))]
14405pub fn _mm256_mask_cvtusepi32_epi16(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
14406 unsafe { transmute(src:vpmovusdw256(a.as_u32x8(), src.as_u16x8(), mask:k)) }
14407}
14408
14409/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14410///
14411/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtusepi32_epi16&expand=2053)
14412#[inline]
14413#[target_feature(enable = "avx512f,avx512vl")]
14414#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14415#[cfg_attr(test, assert_instr(vpmovusdw))]
14416pub fn _mm256_maskz_cvtusepi32_epi16(k: __mmask8, a: __m256i) -> __m128i {
14417 unsafe { transmute(src:vpmovusdw256(a.as_u32x8(), src:u16x8::ZERO, mask:k)) }
14418}
14419
14420/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst.
14421///
14422/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtusepi32_epi16&expand=2048)
14423#[inline]
14424#[target_feature(enable = "avx512f,avx512vl")]
14425#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14426#[cfg_attr(test, assert_instr(vpmovusdw))]
14427pub fn _mm_cvtusepi32_epi16(a: __m128i) -> __m128i {
14428 unsafe { transmute(src:vpmovusdw128(a.as_u32x4(), src:u16x8::ZERO, mask:0b11111111)) }
14429}
14430
14431/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14432///
14433/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi32_epi16&expand=2049)
14434#[inline]
14435#[target_feature(enable = "avx512f,avx512vl")]
14436#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14437#[cfg_attr(test, assert_instr(vpmovusdw))]
14438pub fn _mm_mask_cvtusepi32_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
14439 unsafe { transmute(src:vpmovusdw128(a.as_u32x4(), src.as_u16x8(), mask:k)) }
14440}
14441
14442/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14443///
14444/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtusepi32_epi16&expand=2050)
14445#[inline]
14446#[target_feature(enable = "avx512f,avx512vl")]
14447#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14448#[cfg_attr(test, assert_instr(vpmovusdw))]
14449pub fn _mm_maskz_cvtusepi32_epi16(k: __mmask8, a: __m128i) -> __m128i {
14450 unsafe { transmute(src:vpmovusdw128(a.as_u32x4(), src:u16x8::ZERO, mask:k)) }
14451}
14452
14453/// Convert packed unsigned 32-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst.
14454///
14455/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtusepi32_epi8&expand=2063)
14456#[inline]
14457#[target_feature(enable = "avx512f")]
14458#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14459#[cfg_attr(test, assert_instr(vpmovusdb))]
14460pub fn _mm512_cvtusepi32_epi8(a: __m512i) -> __m128i {
14461 unsafe { transmute(src:vpmovusdb(a.as_u32x16(), src:u8x16::ZERO, mask:0b11111111_11111111)) }
14462}
14463
14464/// Convert packed unsigned 32-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14465///
14466/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi32_epi8&expand=2064)
14467#[inline]
14468#[target_feature(enable = "avx512f")]
14469#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14470#[cfg_attr(test, assert_instr(vpmovusdb))]
14471pub fn _mm512_mask_cvtusepi32_epi8(src: __m128i, k: __mmask16, a: __m512i) -> __m128i {
14472 unsafe { transmute(src:vpmovusdb(a.as_u32x16(), src.as_u8x16(), mask:k)) }
14473}
14474
14475/// Convert packed unsigned 32-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14476///
14477/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtusepi32_epi8&expand=2065)
14478#[inline]
14479#[target_feature(enable = "avx512f")]
14480#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14481#[cfg_attr(test, assert_instr(vpmovusdb))]
14482pub fn _mm512_maskz_cvtusepi32_epi8(k: __mmask16, a: __m512i) -> __m128i {
14483 unsafe { transmute(src:vpmovusdb(a.as_u32x16(), src:u8x16::ZERO, mask:k)) }
14484}
14485
14486/// Convert packed unsigned 32-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst.
14487///
14488/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtusepi32_epi8&expand=2060)
14489#[inline]
14490#[target_feature(enable = "avx512f,avx512vl")]
14491#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14492#[cfg_attr(test, assert_instr(vpmovusdb))]
14493pub fn _mm256_cvtusepi32_epi8(a: __m256i) -> __m128i {
14494 unsafe { transmute(src:vpmovusdb256(a.as_u32x8(), src:u8x16::ZERO, mask:0b11111111)) }
14495}
14496
14497/// Convert packed unsigned 32-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14498///
14499/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi32_epi8&expand=2061)
14500#[inline]
14501#[target_feature(enable = "avx512f,avx512vl")]
14502#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14503#[cfg_attr(test, assert_instr(vpmovusdb))]
14504pub fn _mm256_mask_cvtusepi32_epi8(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
14505 unsafe { transmute(src:vpmovusdb256(a.as_u32x8(), src.as_u8x16(), mask:k)) }
14506}
14507
14508/// Convert packed unsigned 32-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14509///
14510/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtusepi32_epi8&expand=2062)
14511#[inline]
14512#[target_feature(enable = "avx512f,avx512vl")]
14513#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14514#[cfg_attr(test, assert_instr(vpmovusdb))]
14515pub fn _mm256_maskz_cvtusepi32_epi8(k: __mmask8, a: __m256i) -> __m128i {
14516 unsafe { transmute(src:vpmovusdb256(a.as_u32x8(), src:u8x16::ZERO, mask:k)) }
14517}
14518
14519/// Convert packed unsigned 32-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst.
14520///
14521/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtusepi32_epi8&expand=2057)
14522#[inline]
14523#[target_feature(enable = "avx512f,avx512vl")]
14524#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14525#[cfg_attr(test, assert_instr(vpmovusdb))]
14526pub fn _mm_cvtusepi32_epi8(a: __m128i) -> __m128i {
14527 unsafe { transmute(src:vpmovusdb128(a.as_u32x4(), src:u8x16::ZERO, mask:0b11111111)) }
14528}
14529
14530/// Convert packed unsigned 32-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14531///
14532/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi32_epi8&expand=2058)
14533#[inline]
14534#[target_feature(enable = "avx512f,avx512vl")]
14535#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14536#[cfg_attr(test, assert_instr(vpmovusdb))]
14537pub fn _mm_mask_cvtusepi32_epi8(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
14538 unsafe { transmute(src:vpmovusdb128(a.as_u32x4(), src.as_u8x16(), mask:k)) }
14539}
14540
14541/// Convert packed unsigned 32-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14542///
14543/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtusepi32_epi8&expand=2059)
14544#[inline]
14545#[target_feature(enable = "avx512f,avx512vl")]
14546#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14547#[cfg_attr(test, assert_instr(vpmovusdb))]
14548pub fn _mm_maskz_cvtusepi32_epi8(k: __mmask8, a: __m128i) -> __m128i {
14549 unsafe { transmute(src:vpmovusdb128(a.as_u32x4(), src:u8x16::ZERO, mask:k)) }
14550}
14551
14552/// Convert packed unsigned 64-bit integers in a to packed unsigned 32-bit integers with unsigned saturation, and store the results in dst.
14553///
14554/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtusepi64_epi32&expand=2087)
14555#[inline]
14556#[target_feature(enable = "avx512f")]
14557#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14558#[cfg_attr(test, assert_instr(vpmovusqd))]
14559pub fn _mm512_cvtusepi64_epi32(a: __m512i) -> __m256i {
14560 unsafe { transmute(src:vpmovusqd(a.as_u64x8(), src:u32x8::ZERO, mask:0b11111111)) }
14561}
14562
14563/// Convert packed unsigned 64-bit integers in a to packed unsigned 32-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14564///
14565/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi64_epi32&expand=2088)
14566#[inline]
14567#[target_feature(enable = "avx512f")]
14568#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14569#[cfg_attr(test, assert_instr(vpmovusqd))]
14570pub fn _mm512_mask_cvtusepi64_epi32(src: __m256i, k: __mmask8, a: __m512i) -> __m256i {
14571 unsafe { transmute(src:vpmovusqd(a.as_u64x8(), src.as_u32x8(), mask:k)) }
14572}
14573
14574/// Convert packed unsigned 64-bit integers in a to packed unsigned 32-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14575///
14576/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtusepi64_epi32&expand=2089)
14577#[inline]
14578#[target_feature(enable = "avx512f")]
14579#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14580#[cfg_attr(test, assert_instr(vpmovusqd))]
14581pub fn _mm512_maskz_cvtusepi64_epi32(k: __mmask8, a: __m512i) -> __m256i {
14582 unsafe { transmute(src:vpmovusqd(a.as_u64x8(), src:u32x8::ZERO, mask:k)) }
14583}
14584
14585/// Convert packed unsigned 64-bit integers in a to packed unsigned 32-bit integers with unsigned saturation, and store the results in dst.
14586///
14587/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtusepi64_epi32&expand=2084)
14588#[inline]
14589#[target_feature(enable = "avx512f,avx512vl")]
14590#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14591#[cfg_attr(test, assert_instr(vpmovusqd))]
14592pub fn _mm256_cvtusepi64_epi32(a: __m256i) -> __m128i {
14593 unsafe { transmute(src:vpmovusqd256(a.as_u64x4(), src:u32x4::ZERO, mask:0b11111111)) }
14594}
14595
14596/// Convert packed unsigned 64-bit integers in a to packed unsigned 32-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14597///
14598/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi64_epi32&expand=2085)
14599#[inline]
14600#[target_feature(enable = "avx512f,avx512vl")]
14601#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14602#[cfg_attr(test, assert_instr(vpmovusqd))]
14603pub fn _mm256_mask_cvtusepi64_epi32(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
14604 unsafe { transmute(src:vpmovusqd256(a.as_u64x4(), src.as_u32x4(), mask:k)) }
14605}
14606
14607/// Convert packed unsigned 64-bit integers in a to packed unsigned 32-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14608///
14609/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtusepi64_epi32&expand=2086)
14610#[inline]
14611#[target_feature(enable = "avx512f,avx512vl")]
14612#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14613#[cfg_attr(test, assert_instr(vpmovusqd))]
14614pub fn _mm256_maskz_cvtusepi64_epi32(k: __mmask8, a: __m256i) -> __m128i {
14615 unsafe { transmute(src:vpmovusqd256(a.as_u64x4(), src:u32x4::ZERO, mask:k)) }
14616}
14617
14618/// Convert packed unsigned 64-bit integers in a to packed unsigned 32-bit integers with unsigned saturation, and store the results in dst.
14619///
14620/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtusepi64_epi32&expand=2081)
14621#[inline]
14622#[target_feature(enable = "avx512f,avx512vl")]
14623#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14624#[cfg_attr(test, assert_instr(vpmovusqd))]
14625pub fn _mm_cvtusepi64_epi32(a: __m128i) -> __m128i {
14626 unsafe { transmute(src:vpmovusqd128(a.as_u64x2(), src:u32x4::ZERO, mask:0b11111111)) }
14627}
14628
14629/// Convert packed unsigned 64-bit integers in a to packed unsigned 32-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14630///
14631/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi64_epi32&expand=2082)
14632#[inline]
14633#[target_feature(enable = "avx512f,avx512vl")]
14634#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14635#[cfg_attr(test, assert_instr(vpmovusqd))]
14636pub fn _mm_mask_cvtusepi64_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
14637 unsafe { transmute(src:vpmovusqd128(a.as_u64x2(), src.as_u32x4(), mask:k)) }
14638}
14639
14640/// Convert packed unsigned 64-bit integers in a to packed unsigned 32-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14641///
14642/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtusepi64_epi32&expand=2083)
14643#[inline]
14644#[target_feature(enable = "avx512f,avx512vl")]
14645#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14646#[cfg_attr(test, assert_instr(vpmovusqd))]
14647pub fn _mm_maskz_cvtusepi64_epi32(k: __mmask8, a: __m128i) -> __m128i {
14648 unsafe { transmute(src:vpmovusqd128(a.as_u64x2(), src:u32x4::ZERO, mask:k)) }
14649}
14650
14651/// Convert packed unsigned 64-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst.
14652///
14653/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtusepi64_epi16&expand=2078)
14654#[inline]
14655#[target_feature(enable = "avx512f")]
14656#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14657#[cfg_attr(test, assert_instr(vpmovusqw))]
14658pub fn _mm512_cvtusepi64_epi16(a: __m512i) -> __m128i {
14659 unsafe { transmute(src:vpmovusqw(a.as_u64x8(), src:u16x8::ZERO, mask:0b11111111)) }
14660}
14661
14662/// Convert packed unsigned 64-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14663///
14664/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi64_epi16&expand=2079)
14665#[inline]
14666#[target_feature(enable = "avx512f")]
14667#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14668#[cfg_attr(test, assert_instr(vpmovusqw))]
14669pub fn _mm512_mask_cvtusepi64_epi16(src: __m128i, k: __mmask8, a: __m512i) -> __m128i {
14670 unsafe { transmute(src:vpmovusqw(a.as_u64x8(), src.as_u16x8(), mask:k)) }
14671}
14672
14673/// Convert packed unsigned 64-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14674///
14675/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtusepi64_epi16&expand=2080)
14676#[inline]
14677#[target_feature(enable = "avx512f")]
14678#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14679#[cfg_attr(test, assert_instr(vpmovusqw))]
14680pub fn _mm512_maskz_cvtusepi64_epi16(k: __mmask8, a: __m512i) -> __m128i {
14681 unsafe { transmute(src:vpmovusqw(a.as_u64x8(), src:u16x8::ZERO, mask:k)) }
14682}
14683
14684/// Convert packed unsigned 64-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst.
14685///
14686/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtusepi64_epi16&expand=2075)
14687#[inline]
14688#[target_feature(enable = "avx512f,avx512vl")]
14689#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14690#[cfg_attr(test, assert_instr(vpmovusqw))]
14691pub fn _mm256_cvtusepi64_epi16(a: __m256i) -> __m128i {
14692 unsafe { transmute(src:vpmovusqw256(a.as_u64x4(), src:u16x8::ZERO, mask:0b11111111)) }
14693}
14694
14695/// Convert packed unsigned 64-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14696///
14697/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi64_epi16&expand=2076)
14698#[inline]
14699#[target_feature(enable = "avx512f,avx512vl")]
14700#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14701#[cfg_attr(test, assert_instr(vpmovusqw))]
14702pub fn _mm256_mask_cvtusepi64_epi16(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
14703 unsafe { transmute(src:vpmovusqw256(a.as_u64x4(), src.as_u16x8(), mask:k)) }
14704}
14705
14706/// Convert packed unsigned 64-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14707///
14708/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtusepi64_epi16&expand=2077)
14709#[inline]
14710#[target_feature(enable = "avx512f,avx512vl")]
14711#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14712#[cfg_attr(test, assert_instr(vpmovusqw))]
14713pub fn _mm256_maskz_cvtusepi64_epi16(k: __mmask8, a: __m256i) -> __m128i {
14714 unsafe { transmute(src:vpmovusqw256(a.as_u64x4(), src:u16x8::ZERO, mask:k)) }
14715}
14716
14717/// Convert packed unsigned 64-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst.
14718///
14719/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtusepi64_epi16&expand=2072)
14720#[inline]
14721#[target_feature(enable = "avx512f,avx512vl")]
14722#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14723#[cfg_attr(test, assert_instr(vpmovusqw))]
14724pub fn _mm_cvtusepi64_epi16(a: __m128i) -> __m128i {
14725 unsafe { transmute(src:vpmovusqw128(a.as_u64x2(), src:u16x8::ZERO, mask:0b11111111)) }
14726}
14727
14728/// Convert packed unsigned 64-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14729///
14730/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi64_epi16&expand=2073)
14731#[inline]
14732#[target_feature(enable = "avx512f,avx512vl")]
14733#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14734#[cfg_attr(test, assert_instr(vpmovusqw))]
14735pub fn _mm_mask_cvtusepi64_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
14736 unsafe { transmute(src:vpmovusqw128(a.as_u64x2(), src.as_u16x8(), mask:k)) }
14737}
14738
14739/// Convert packed unsigned 64-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14740///
14741/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtusepi64_epi16&expand=2074)
14742#[inline]
14743#[target_feature(enable = "avx512f,avx512vl")]
14744#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14745#[cfg_attr(test, assert_instr(vpmovusqw))]
14746pub fn _mm_maskz_cvtusepi64_epi16(k: __mmask8, a: __m128i) -> __m128i {
14747 unsafe { transmute(src:vpmovusqw128(a.as_u64x2(), src:u16x8::ZERO, mask:k)) }
14748}
14749
14750/// Convert packed unsigned 64-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst.
14751///
14752/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtusepi64_epi8&expand=2096)
14753#[inline]
14754#[target_feature(enable = "avx512f")]
14755#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14756#[cfg_attr(test, assert_instr(vpmovusqb))]
14757pub fn _mm512_cvtusepi64_epi8(a: __m512i) -> __m128i {
14758 unsafe { transmute(src:vpmovusqb(a.as_u64x8(), src:u8x16::ZERO, mask:0b11111111)) }
14759}
14760
14761/// Convert packed unsigned 64-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14762///
14763/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi64_epi8&expand=2097)
14764#[inline]
14765#[target_feature(enable = "avx512f")]
14766#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14767#[cfg_attr(test, assert_instr(vpmovusqb))]
14768pub fn _mm512_mask_cvtusepi64_epi8(src: __m128i, k: __mmask8, a: __m512i) -> __m128i {
14769 unsafe { transmute(src:vpmovusqb(a.as_u64x8(), src.as_u8x16(), mask:k)) }
14770}
14771
14772/// Convert packed unsigned 64-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14773///
14774/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtusepi64_epi8&expand=2098)
14775#[inline]
14776#[target_feature(enable = "avx512f")]
14777#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14778#[cfg_attr(test, assert_instr(vpmovusqb))]
14779pub fn _mm512_maskz_cvtusepi64_epi8(k: __mmask8, a: __m512i) -> __m128i {
14780 unsafe { transmute(src:vpmovusqb(a.as_u64x8(), src:u8x16::ZERO, mask:k)) }
14781}
14782
14783/// Convert packed unsigned 64-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst.
14784///
14785/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtusepi64_epi8&expand=2093)
14786#[inline]
14787#[target_feature(enable = "avx512f,avx512vl")]
14788#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14789#[cfg_attr(test, assert_instr(vpmovusqb))]
14790pub fn _mm256_cvtusepi64_epi8(a: __m256i) -> __m128i {
14791 unsafe { transmute(src:vpmovusqb256(a.as_u64x4(), src:u8x16::ZERO, mask:0b11111111)) }
14792}
14793
14794/// Convert packed unsigned 64-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14795///
14796/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi64_epi8&expand=2094)
14797#[inline]
14798#[target_feature(enable = "avx512f,avx512vl")]
14799#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14800#[cfg_attr(test, assert_instr(vpmovusqb))]
14801pub fn _mm256_mask_cvtusepi64_epi8(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
14802 unsafe { transmute(src:vpmovusqb256(a.as_u64x4(), src.as_u8x16(), mask:k)) }
14803}
14804
14805/// Convert packed unsigned 64-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14806///
14807/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtusepi64_epi8&expand=2095)
14808#[inline]
14809#[target_feature(enable = "avx512f,avx512vl")]
14810#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14811#[cfg_attr(test, assert_instr(vpmovusqb))]
14812pub fn _mm256_maskz_cvtusepi64_epi8(k: __mmask8, a: __m256i) -> __m128i {
14813 unsafe { transmute(src:vpmovusqb256(a.as_u64x4(), src:u8x16::ZERO, mask:k)) }
14814}
14815
14816/// Convert packed unsigned 64-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst.
14817///
14818/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtusepi64_epi8&expand=2090)
14819#[inline]
14820#[target_feature(enable = "avx512f,avx512vl")]
14821#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14822#[cfg_attr(test, assert_instr(vpmovusqb))]
14823pub fn _mm_cvtusepi64_epi8(a: __m128i) -> __m128i {
14824 unsafe { transmute(src:vpmovusqb128(a.as_u64x2(), src:u8x16::ZERO, mask:0b11111111)) }
14825}
14826
14827/// Convert packed unsigned 64-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14828///
14829/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi64_epi8&expand=2091)
14830#[inline]
14831#[target_feature(enable = "avx512f,avx512vl")]
14832#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14833#[cfg_attr(test, assert_instr(vpmovusqb))]
14834pub fn _mm_mask_cvtusepi64_epi8(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
14835 unsafe { transmute(src:vpmovusqb128(a.as_u64x2(), src.as_u8x16(), mask:k)) }
14836}
14837
14838/// Convert packed unsigned 64-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14839///
14840/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtusepi64_epi8&expand=2092)
14841#[inline]
14842#[target_feature(enable = "avx512f,avx512vl")]
14843#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14844#[cfg_attr(test, assert_instr(vpmovusqb))]
14845pub fn _mm_maskz_cvtusepi64_epi8(k: __mmask8, a: __m128i) -> __m128i {
14846 unsafe { transmute(src:vpmovusqb128(a.as_u64x2(), src:u8x16::ZERO, mask:k)) }
14847}
14848
14849/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst.
14850///
14851/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:
14852/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14853/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14854/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14855/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14856/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14857///
14858/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundps_epi32&expand=1335)
14859#[inline]
14860#[target_feature(enable = "avx512f")]
14861#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14862#[cfg_attr(test, assert_instr(vcvtps2dq, ROUNDING = 8))]
14863#[rustc_legacy_const_generics(1)]
14864pub fn _mm512_cvt_roundps_epi32<const ROUNDING: i32>(a: __m512) -> __m512i {
14865 unsafe {
14866 static_assert_rounding!(ROUNDING);
14867 let a: Simd = a.as_f32x16();
14868 let r: Simd = vcvtps2dq(a, src:i32x16::ZERO, mask:0b11111111_11111111, ROUNDING);
14869 transmute(src:r)
14870 }
14871}
14872
14873/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
14874///
14875/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14876/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14877/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14878/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14879/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14880/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14881///
14882/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundps_epi32&expand=1336)
14883#[inline]
14884#[target_feature(enable = "avx512f")]
14885#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14886#[cfg_attr(test, assert_instr(vcvtps2dq, ROUNDING = 8))]
14887#[rustc_legacy_const_generics(3)]
14888pub fn _mm512_mask_cvt_roundps_epi32<const ROUNDING: i32>(
14889 src: __m512i,
14890 k: __mmask16,
14891 a: __m512,
14892) -> __m512i {
14893 unsafe {
14894 static_assert_rounding!(ROUNDING);
14895 let a: Simd = a.as_f32x16();
14896 let src: Simd = src.as_i32x16();
14897 let r: Simd = vcvtps2dq(a, src, mask:k, ROUNDING);
14898 transmute(src:r)
14899 }
14900}
14901
14902/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
14903///
14904/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14905/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14906/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14907/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14908/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14909/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14910///
14911/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundps_epi32&expand=1337)
14912#[inline]
14913#[target_feature(enable = "avx512f")]
14914#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14915#[cfg_attr(test, assert_instr(vcvtps2dq, ROUNDING = 8))]
14916#[rustc_legacy_const_generics(2)]
14917pub fn _mm512_maskz_cvt_roundps_epi32<const ROUNDING: i32>(k: __mmask16, a: __m512) -> __m512i {
14918 unsafe {
14919 static_assert_rounding!(ROUNDING);
14920 let a: Simd = a.as_f32x16();
14921 let r: Simd = vcvtps2dq(a, src:i32x16::ZERO, mask:k, ROUNDING);
14922 transmute(src:r)
14923 }
14924}
14925
14926/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst.\
14927///
14928/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14929/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14930/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14931/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14932/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14933/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14934///
14935/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundps_epu32&expand=1341)
14936#[inline]
14937#[target_feature(enable = "avx512f")]
14938#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14939#[cfg_attr(test, assert_instr(vcvtps2udq, ROUNDING = 8))]
14940#[rustc_legacy_const_generics(1)]
14941pub fn _mm512_cvt_roundps_epu32<const ROUNDING: i32>(a: __m512) -> __m512i {
14942 unsafe {
14943 static_assert_rounding!(ROUNDING);
14944 let a: Simd = a.as_f32x16();
14945 let r: Simd = vcvtps2udq(a, src:u32x16::ZERO, mask:0b11111111_11111111, ROUNDING);
14946 transmute(src:r)
14947 }
14948}
14949
14950/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
14951///
14952/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14953/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14954/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14955/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14956/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14957/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14958///
14959/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundps_epu32&expand=1342)
14960#[inline]
14961#[target_feature(enable = "avx512f")]
14962#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14963#[cfg_attr(test, assert_instr(vcvtps2udq, ROUNDING = 8))]
14964#[rustc_legacy_const_generics(3)]
14965pub fn _mm512_mask_cvt_roundps_epu32<const ROUNDING: i32>(
14966 src: __m512i,
14967 k: __mmask16,
14968 a: __m512,
14969) -> __m512i {
14970 unsafe {
14971 static_assert_rounding!(ROUNDING);
14972 let a: Simd = a.as_f32x16();
14973 let src: Simd = src.as_u32x16();
14974 let r: Simd = vcvtps2udq(a, src, mask:k, ROUNDING);
14975 transmute(src:r)
14976 }
14977}
14978
14979/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
14980///
14981/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14982/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14983/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14984/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14985/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14986/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14987///
14988/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundps_epu32&expand=1343)
14989#[inline]
14990#[target_feature(enable = "avx512f")]
14991#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14992#[cfg_attr(test, assert_instr(vcvtps2udq, ROUNDING = 8))]
14993#[rustc_legacy_const_generics(2)]
14994pub fn _mm512_maskz_cvt_roundps_epu32<const ROUNDING: i32>(k: __mmask16, a: __m512) -> __m512i {
14995 unsafe {
14996 static_assert_rounding!(ROUNDING);
14997 let a: Simd = a.as_f32x16();
14998 let r: Simd = vcvtps2udq(a, src:u32x16::ZERO, mask:k, ROUNDING);
14999 transmute(src:r)
15000 }
15001}
15002
15003/// Convert packed single-precision (32-bit) floating-point elements in a to packed double-precision (64-bit) floating-point elements, and store the results in dst.\
15004/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15005///
15006/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundps_pd&expand=1347)
15007#[inline]
15008#[target_feature(enable = "avx512f")]
15009#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15010#[cfg_attr(test, assert_instr(vcvtps2pd, SAE = 8))]
15011#[rustc_legacy_const_generics(1)]
15012pub fn _mm512_cvt_roundps_pd<const SAE: i32>(a: __m256) -> __m512d {
15013 unsafe {
15014 static_assert_sae!(SAE);
15015 let a: Simd = a.as_f32x8();
15016 let r: Simd = vcvtps2pd(a, src:f64x8::ZERO, mask:0b11111111, SAE);
15017 transmute(src:r)
15018 }
15019}
15020
15021/// Convert packed single-precision (32-bit) floating-point elements in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
15022/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15023///
15024/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundps_pd&expand=1336)
15025#[inline]
15026#[target_feature(enable = "avx512f")]
15027#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15028#[cfg_attr(test, assert_instr(vcvtps2pd, SAE = 8))]
15029#[rustc_legacy_const_generics(3)]
15030pub fn _mm512_mask_cvt_roundps_pd<const SAE: i32>(src: __m512d, k: __mmask8, a: __m256) -> __m512d {
15031 unsafe {
15032 static_assert_sae!(SAE);
15033 let a: Simd = a.as_f32x8();
15034 let src: Simd = src.as_f64x8();
15035 let r: Simd = vcvtps2pd(a, src, mask:k, SAE);
15036 transmute(src:r)
15037 }
15038}
15039
15040/// Convert packed single-precision (32-bit) floating-point elements in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
15041/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15042///
15043/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundps_pd&expand=1337)
15044#[inline]
15045#[target_feature(enable = "avx512f")]
15046#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15047#[cfg_attr(test, assert_instr(vcvtps2pd, SAE = 8))]
15048#[rustc_legacy_const_generics(2)]
15049pub fn _mm512_maskz_cvt_roundps_pd<const SAE: i32>(k: __mmask8, a: __m256) -> __m512d {
15050 unsafe {
15051 static_assert_sae!(SAE);
15052 let a: Simd = a.as_f32x8();
15053 let r: Simd = vcvtps2pd(a, src:f64x8::ZERO, mask:k, SAE);
15054 transmute(src:r)
15055 }
15056}
15057
15058/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst.\
15059///
15060/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
15061/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
15062/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
15063/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
15064/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
15065/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15066///
15067/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundpd_epi32&expand=1315)
15068#[inline]
15069#[target_feature(enable = "avx512f")]
15070#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15071#[cfg_attr(test, assert_instr(vcvtpd2dq, ROUNDING = 8))]
15072#[rustc_legacy_const_generics(1)]
15073pub fn _mm512_cvt_roundpd_epi32<const ROUNDING: i32>(a: __m512d) -> __m256i {
15074 unsafe {
15075 static_assert_rounding!(ROUNDING);
15076 let a: Simd = a.as_f64x8();
15077 let r: Simd = vcvtpd2dq(a, src:i32x8::ZERO, mask:0b11111111, ROUNDING);
15078 transmute(src:r)
15079 }
15080}
15081
15082/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
15083///
15084/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
15085/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
15086/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
15087/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
15088/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
15089/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15090///
15091/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundpd_epi32&expand=1316)
15092#[inline]
15093#[target_feature(enable = "avx512f")]
15094#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15095#[cfg_attr(test, assert_instr(vcvtpd2dq, ROUNDING = 8))]
15096#[rustc_legacy_const_generics(3)]
15097pub fn _mm512_mask_cvt_roundpd_epi32<const ROUNDING: i32>(
15098 src: __m256i,
15099 k: __mmask8,
15100 a: __m512d,
15101) -> __m256i {
15102 unsafe {
15103 static_assert_rounding!(ROUNDING);
15104 let a: Simd = a.as_f64x8();
15105 let src: Simd = src.as_i32x8();
15106 let r: Simd = vcvtpd2dq(a, src, mask:k, ROUNDING);
15107 transmute(src:r)
15108 }
15109}
15110
15111/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
15112///
15113/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
15114/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
15115/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
15116/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
15117/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
15118/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15119///
15120/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundpd_epi32&expand=1317)
15121#[inline]
15122#[target_feature(enable = "avx512f")]
15123#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15124#[cfg_attr(test, assert_instr(vcvtpd2dq, ROUNDING = 8))]
15125#[rustc_legacy_const_generics(2)]
15126pub fn _mm512_maskz_cvt_roundpd_epi32<const ROUNDING: i32>(k: __mmask8, a: __m512d) -> __m256i {
15127 unsafe {
15128 static_assert_rounding!(ROUNDING);
15129 let a: Simd = a.as_f64x8();
15130 let r: Simd = vcvtpd2dq(a, src:i32x8::ZERO, mask:k, ROUNDING);
15131 transmute(src:r)
15132 }
15133}
15134
15135/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst.\
15136///
15137/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
15138/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
15139/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
15140/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
15141/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
15142/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15143///
15144/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundpd_epu32&expand=1321)
15145#[inline]
15146#[target_feature(enable = "avx512f")]
15147#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15148#[cfg_attr(test, assert_instr(vcvtpd2udq, ROUNDING = 8))]
15149#[rustc_legacy_const_generics(1)]
15150pub fn _mm512_cvt_roundpd_epu32<const ROUNDING: i32>(a: __m512d) -> __m256i {
15151 unsafe {
15152 static_assert_rounding!(ROUNDING);
15153 let a: Simd = a.as_f64x8();
15154 let r: Simd = vcvtpd2udq(a, src:u32x8::ZERO, mask:0b11111111, ROUNDING);
15155 transmute(src:r)
15156 }
15157}
15158
15159/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
15160///
15161/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
15162/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
15163/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
15164/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
15165/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
15166/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15167///
15168/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundpd_epu32&expand=1322)
15169#[inline]
15170#[target_feature(enable = "avx512f")]
15171#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15172#[cfg_attr(test, assert_instr(vcvtpd2udq, ROUNDING = 8))]
15173#[rustc_legacy_const_generics(3)]
15174pub fn _mm512_mask_cvt_roundpd_epu32<const ROUNDING: i32>(
15175 src: __m256i,
15176 k: __mmask8,
15177 a: __m512d,
15178) -> __m256i {
15179 unsafe {
15180 static_assert_rounding!(ROUNDING);
15181 let a: Simd = a.as_f64x8();
15182 let src: Simd = src.as_u32x8();
15183 let r: Simd = vcvtpd2udq(a, src, mask:k, ROUNDING);
15184 transmute(src:r)
15185 }
15186}
15187
15188/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
15189///
15190/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
15191/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
15192/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
15193/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
15194/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
15195/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15196///
15197/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundpd_epu32&expand=1323)
15198#[inline]
15199#[target_feature(enable = "avx512f")]
15200#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15201#[cfg_attr(test, assert_instr(vcvtpd2udq, ROUNDING = 8))]
15202#[rustc_legacy_const_generics(2)]
15203pub fn _mm512_maskz_cvt_roundpd_epu32<const ROUNDING: i32>(k: __mmask8, a: __m512d) -> __m256i {
15204 unsafe {
15205 static_assert_rounding!(ROUNDING);
15206 let a: Simd = a.as_f64x8();
15207 let r: Simd = vcvtpd2udq(a, src:u32x8::ZERO, mask:k, ROUNDING);
15208 transmute(src:r)
15209 }
15210}
15211
15212/// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst.\
15213///
15214/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
15215/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
15216/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
15217/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
15218/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
15219/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15220///
15221/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundpd_ps&expand=1327)
15222#[inline]
15223#[target_feature(enable = "avx512f")]
15224#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15225#[cfg_attr(test, assert_instr(vcvtpd2ps, ROUNDING = 8))]
15226#[rustc_legacy_const_generics(1)]
15227pub fn _mm512_cvt_roundpd_ps<const ROUNDING: i32>(a: __m512d) -> __m256 {
15228 unsafe {
15229 static_assert_rounding!(ROUNDING);
15230 let a: Simd = a.as_f64x8();
15231 let r: Simd = vcvtpd2ps(a, src:f32x8::ZERO, mask:0b11111111, ROUNDING);
15232 transmute(src:r)
15233 }
15234}
15235
15236/// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
15237///
15238/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
15239/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
15240/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
15241/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
15242/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
15243/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15244///
15245/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundpd_ps&expand=1328)
15246#[inline]
15247#[target_feature(enable = "avx512f")]
15248#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15249#[cfg_attr(test, assert_instr(vcvtpd2ps, ROUNDING = 8))]
15250#[rustc_legacy_const_generics(3)]
15251pub fn _mm512_mask_cvt_roundpd_ps<const ROUNDING: i32>(
15252 src: __m256,
15253 k: __mmask8,
15254 a: __m512d,
15255) -> __m256 {
15256 unsafe {
15257 static_assert_rounding!(ROUNDING);
15258 let a: Simd = a.as_f64x8();
15259 let src: Simd = src.as_f32x8();
15260 let r: Simd = vcvtpd2ps(a, src, mask:k, ROUNDING);
15261 transmute(src:r)
15262 }
15263}
15264
15265/// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
15266///
15267/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
15268/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
15269/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
15270/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
15271/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
15272/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15273///
15274/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundpd_ps&expand=1329)
15275#[inline]
15276#[target_feature(enable = "avx512f")]
15277#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15278#[cfg_attr(test, assert_instr(vcvtpd2ps, ROUNDING = 8))]
15279#[rustc_legacy_const_generics(2)]
15280pub fn _mm512_maskz_cvt_roundpd_ps<const ROUNDING: i32>(k: __mmask8, a: __m512d) -> __m256 {
15281 unsafe {
15282 static_assert_rounding!(ROUNDING);
15283 let a: Simd = a.as_f64x8();
15284 let r: Simd = vcvtpd2ps(a, src:f32x8::ZERO, mask:k, ROUNDING);
15285 transmute(src:r)
15286 }
15287}
15288
15289/// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst.\
15290///
15291/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
15292/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
15293/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
15294/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
15295/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
15296/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15297///
15298/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundepi32_ps&expand=1294)
15299#[inline]
15300#[target_feature(enable = "avx512f")]
15301#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15302#[cfg_attr(test, assert_instr(vcvtdq2ps, ROUNDING = 8))]
15303#[rustc_legacy_const_generics(1)]
15304pub fn _mm512_cvt_roundepi32_ps<const ROUNDING: i32>(a: __m512i) -> __m512 {
15305 unsafe {
15306 static_assert_rounding!(ROUNDING);
15307 let a: Simd = a.as_i32x16();
15308 let r: Simd = vcvtdq2ps(a, ROUNDING);
15309 transmute(src:r)
15310 }
15311}
15312
15313/// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
15314///
15315/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
15316/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
15317/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
15318/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
15319/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
15320/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15321///
15322/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundepi32_ps&expand=1295)
15323#[inline]
15324#[target_feature(enable = "avx512f")]
15325#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15326#[cfg_attr(test, assert_instr(vcvtdq2ps, ROUNDING = 8))]
15327#[rustc_legacy_const_generics(3)]
15328pub fn _mm512_mask_cvt_roundepi32_ps<const ROUNDING: i32>(
15329 src: __m512,
15330 k: __mmask16,
15331 a: __m512i,
15332) -> __m512 {
15333 unsafe {
15334 static_assert_rounding!(ROUNDING);
15335 let a: Simd = a.as_i32x16();
15336 let r: Simd = vcvtdq2ps(a, ROUNDING);
15337 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_f32x16()))
15338 }
15339}
15340
15341/// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
15342///
15343/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
15344/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
15345/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
15346/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
15347/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
15348/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15349///
15350/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundepi32_ps&expand=1296)
15351#[inline]
15352#[target_feature(enable = "avx512f")]
15353#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15354#[cfg_attr(test, assert_instr(vcvtdq2ps, ROUNDING = 8))]
15355#[rustc_legacy_const_generics(2)]
15356pub fn _mm512_maskz_cvt_roundepi32_ps<const ROUNDING: i32>(k: __mmask16, a: __m512i) -> __m512 {
15357 unsafe {
15358 static_assert_rounding!(ROUNDING);
15359 let a: Simd = a.as_i32x16();
15360 let r: Simd = vcvtdq2ps(a, ROUNDING);
15361 transmute(src:simd_select_bitmask(m:k, yes:r, no:f32x16::ZERO))
15362 }
15363}
15364
15365/// Convert packed unsigned 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst.\
15366///
15367/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
15368/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
15369/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
15370/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
15371/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
15372/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15373///
15374/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundepu32_ps&expand=1303)
15375#[inline]
15376#[target_feature(enable = "avx512f")]
15377#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15378#[cfg_attr(test, assert_instr(vcvtudq2ps, ROUNDING = 8))]
15379#[rustc_legacy_const_generics(1)]
15380pub fn _mm512_cvt_roundepu32_ps<const ROUNDING: i32>(a: __m512i) -> __m512 {
15381 unsafe {
15382 static_assert_rounding!(ROUNDING);
15383 let a: Simd = a.as_u32x16();
15384 let r: Simd = vcvtudq2ps(a, ROUNDING);
15385 transmute(src:r)
15386 }
15387}
15388
15389/// Convert packed unsigned 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
15390///
15391/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
15392/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
15393/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
15394/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
15395/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
15396/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15397///
15398/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundepu32_ps&expand=1304)
15399#[inline]
15400#[target_feature(enable = "avx512f")]
15401#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15402#[cfg_attr(test, assert_instr(vcvtudq2ps, ROUNDING = 8))]
15403#[rustc_legacy_const_generics(3)]
15404pub fn _mm512_mask_cvt_roundepu32_ps<const ROUNDING: i32>(
15405 src: __m512,
15406 k: __mmask16,
15407 a: __m512i,
15408) -> __m512 {
15409 unsafe {
15410 static_assert_rounding!(ROUNDING);
15411 let a: Simd = a.as_u32x16();
15412 let r: Simd = vcvtudq2ps(a, ROUNDING);
15413 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_f32x16()))
15414 }
15415}
15416
15417/// Convert packed unsigned 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
15418///
15419/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
15420/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
15421/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
15422/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
15423/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
15424/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15425///
15426/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundepu32_ps&expand=1305)
15427#[inline]
15428#[target_feature(enable = "avx512f")]
15429#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15430#[cfg_attr(test, assert_instr(vcvtudq2ps, ROUNDING = 8))]
15431#[rustc_legacy_const_generics(2)]
15432pub fn _mm512_maskz_cvt_roundepu32_ps<const ROUNDING: i32>(k: __mmask16, a: __m512i) -> __m512 {
15433 unsafe {
15434 static_assert_rounding!(ROUNDING);
15435 let a: Simd = a.as_u32x16();
15436 let r: Simd = vcvtudq2ps(a, ROUNDING);
15437 transmute(src:simd_select_bitmask(m:k, yes:r, no:f32x16::ZERO))
15438 }
15439}
15440
15441/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst.\
15442/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:
15443/// * [`_MM_FROUND_TO_NEAREST_INT`] // round to nearest
15444/// * [`_MM_FROUND_TO_NEG_INF`] // round down
15445/// * [`_MM_FROUND_TO_POS_INF`] // round up
15446/// * [`_MM_FROUND_TO_ZERO`] // truncate
15447/// * [`_MM_FROUND_CUR_DIRECTION`] // use MXCSR.RC; see [`_MM_SET_ROUNDING_MODE`]
15448/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] // round to nearest, and suppress exceptions
15449/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] // round down, and suppress exceptions
15450/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] // round up, and suppress exceptions
15451/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] // truncate, and suppress exceptions
15452/// * [`_MM_FROUND_CUR_DIRECTION`] | [`_MM_FROUND_NO_EXC`] // use MXCSR.RC and suppress exceptions; see [`_MM_SET_ROUNDING_MODE`]
15453///
15454/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundps_ph&expand=1354)
15455#[inline]
15456#[target_feature(enable = "avx512f")]
15457#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15458#[cfg_attr(test, assert_instr(vcvtps2ph, ROUNDING = 8))]
15459#[rustc_legacy_const_generics(1)]
15460pub fn _mm512_cvt_roundps_ph<const ROUNDING: i32>(a: __m512) -> __m256i {
15461 unsafe {
15462 static_assert_extended_rounding!(ROUNDING);
15463 let a: Simd = a.as_f32x16();
15464 let r: Simd = vcvtps2ph(a, ROUNDING, src:i16x16::ZERO, mask:0b11111111_11111111);
15465 transmute(src:r)
15466 }
15467}
15468
15469/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
15470/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:
15471/// * [`_MM_FROUND_TO_NEAREST_INT`] // round to nearest
15472/// * [`_MM_FROUND_TO_NEG_INF`] // round down
15473/// * [`_MM_FROUND_TO_POS_INF`] // round up
15474/// * [`_MM_FROUND_TO_ZERO`] // truncate
15475/// * [`_MM_FROUND_CUR_DIRECTION`] // use MXCSR.RC; see [`_MM_SET_ROUNDING_MODE`]
15476/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] // round to nearest, and suppress exceptions
15477/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] // round down, and suppress exceptions
15478/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] // round up, and suppress exceptions
15479/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] // truncate, and suppress exceptions
15480/// * [`_MM_FROUND_CUR_DIRECTION`] | [`_MM_FROUND_NO_EXC`] // use MXCSR.RC and suppress exceptions; see [`_MM_SET_ROUNDING_MODE`]
15481///
15482/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundps_ph&expand=1355)
15483#[inline]
15484#[target_feature(enable = "avx512f")]
15485#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15486#[cfg_attr(test, assert_instr(vcvtps2ph, ROUNDING = 8))]
15487#[rustc_legacy_const_generics(3)]
15488pub fn _mm512_mask_cvt_roundps_ph<const ROUNDING: i32>(
15489 src: __m256i,
15490 k: __mmask16,
15491 a: __m512,
15492) -> __m256i {
15493 unsafe {
15494 static_assert_extended_rounding!(ROUNDING);
15495 let a: Simd = a.as_f32x16();
15496 let src: Simd = src.as_i16x16();
15497 let r: Simd = vcvtps2ph(a, ROUNDING, src, mask:k);
15498 transmute(src:r)
15499 }
15500}
15501
15502/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
15503/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:
15504/// * [`_MM_FROUND_TO_NEAREST_INT`] // round to nearest
15505/// * [`_MM_FROUND_TO_NEG_INF`] // round down
15506/// * [`_MM_FROUND_TO_POS_INF`] // round up
15507/// * [`_MM_FROUND_TO_ZERO`] // truncate
15508/// * [`_MM_FROUND_CUR_DIRECTION`] // use MXCSR.RC; see [`_MM_SET_ROUNDING_MODE`]
15509/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] // round to nearest, and suppress exceptions
15510/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] // round down, and suppress exceptions
15511/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] // round up, and suppress exceptions
15512/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] // truncate, and suppress exceptions
15513/// * [`_MM_FROUND_CUR_DIRECTION`] | [`_MM_FROUND_NO_EXC`] // use MXCSR.RC and suppress exceptions; see [`_MM_SET_ROUNDING_MODE`]
15514///
15515/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundps_ph&expand=1356)
15516#[inline]
15517#[target_feature(enable = "avx512f")]
15518#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15519#[cfg_attr(test, assert_instr(vcvtps2ph, ROUNDING = 8))]
15520#[rustc_legacy_const_generics(2)]
15521pub fn _mm512_maskz_cvt_roundps_ph<const ROUNDING: i32>(k: __mmask16, a: __m512) -> __m256i {
15522 unsafe {
15523 static_assert_extended_rounding!(ROUNDING);
15524 let a: Simd = a.as_f32x16();
15525 let r: Simd = vcvtps2ph(a, ROUNDING, src:i16x16::ZERO, mask:k);
15526 transmute(src:r)
15527 }
15528}
15529
15530/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
15531/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:
15532/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
15533/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
15534/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
15535/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
15536/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15537///
15538/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvt_roundps_ph&expand=1352)
15539#[inline]
15540#[target_feature(enable = "avx512f,avx512vl")]
15541#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15542#[cfg_attr(test, assert_instr(vcvtps2ph, IMM8 = 8))]
15543#[rustc_legacy_const_generics(3)]
15544pub fn _mm256_mask_cvt_roundps_ph<const IMM8: i32>(
15545 src: __m128i,
15546 k: __mmask8,
15547 a: __m256,
15548) -> __m128i {
15549 unsafe {
15550 static_assert_uimm_bits!(IMM8, 8);
15551 let a: Simd = a.as_f32x8();
15552 let src: Simd = src.as_i16x8();
15553 let r: Simd = vcvtps2ph256(a, IMM8, src, mask:k);
15554 transmute(src:r)
15555 }
15556}
15557
15558/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
15559/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
15560/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
15561/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
15562/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
15563/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
15564/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15565///
15566/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvt_roundps_ph&expand=1353)
15567#[inline]
15568#[target_feature(enable = "avx512f,avx512vl")]
15569#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15570#[cfg_attr(test, assert_instr(vcvtps2ph, IMM8 = 8))]
15571#[rustc_legacy_const_generics(2)]
15572pub fn _mm256_maskz_cvt_roundps_ph<const IMM8: i32>(k: __mmask8, a: __m256) -> __m128i {
15573 unsafe {
15574 static_assert_uimm_bits!(IMM8, 8);
15575 let a: Simd = a.as_f32x8();
15576 let r: Simd = vcvtps2ph256(a, IMM8, src:i16x8::ZERO, mask:k);
15577 transmute(src:r)
15578 }
15579}
15580
15581/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
15582/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
15583/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
15584/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
15585/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
15586/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
15587/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15588///
15589/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvt_roundps_ph&expand=1350)
15590#[inline]
15591#[target_feature(enable = "avx512f,avx512vl")]
15592#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15593#[cfg_attr(test, assert_instr(vcvtps2ph, IMM8 = 8))]
15594#[rustc_legacy_const_generics(3)]
15595pub fn _mm_mask_cvt_roundps_ph<const IMM8: i32>(src: __m128i, k: __mmask8, a: __m128) -> __m128i {
15596 unsafe {
15597 static_assert_uimm_bits!(IMM8, 8);
15598 let a: Simd = a.as_f32x4();
15599 let src: Simd = src.as_i16x8();
15600 let r: Simd = vcvtps2ph128(a, IMM8, src, mask:k);
15601 transmute(src:r)
15602 }
15603}
15604
15605/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
15606/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
15607/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
15608/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
15609/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
15610/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
15611/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15612///
15613/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvt_roundps_ph&expand=1351)
15614#[inline]
15615#[target_feature(enable = "avx512f,avx512vl")]
15616#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15617#[cfg_attr(test, assert_instr(vcvtps2ph, IMM8 = 8))]
15618#[rustc_legacy_const_generics(2)]
15619pub fn _mm_maskz_cvt_roundps_ph<const IMM8: i32>(k: __mmask8, a: __m128) -> __m128i {
15620 unsafe {
15621 static_assert_uimm_bits!(IMM8, 8);
15622 let a: Simd = a.as_f32x4();
15623 let r: Simd = vcvtps2ph128(a, IMM8, src:i16x8::ZERO, mask:k);
15624 transmute(src:r)
15625 }
15626}
15627
15628/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst.\
15629/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:
15630/// * [`_MM_FROUND_TO_NEAREST_INT`] // round to nearest
15631/// * [`_MM_FROUND_TO_NEG_INF`] // round down
15632/// * [`_MM_FROUND_TO_POS_INF`] // round up
15633/// * [`_MM_FROUND_TO_ZERO`] // truncate
15634/// * [`_MM_FROUND_CUR_DIRECTION`] // use MXCSR.RC; see [`_MM_SET_ROUNDING_MODE`]
15635/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] // round to nearest, and suppress exceptions
15636/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] // round down, and suppress exceptions
15637/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] // round up, and suppress exceptions
15638/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] // truncate, and suppress exceptions
15639/// * [`_MM_FROUND_CUR_DIRECTION`] | [`_MM_FROUND_NO_EXC`] // use MXCSR.RC and suppress exceptions; see [`_MM_SET_ROUNDING_MODE`]
15640///
15641/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtps_ph&expand=1778)
15642#[inline]
15643#[target_feature(enable = "avx512f")]
15644#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15645#[cfg_attr(test, assert_instr(vcvtps2ph, ROUNDING = 8))]
15646#[rustc_legacy_const_generics(1)]
15647pub fn _mm512_cvtps_ph<const ROUNDING: i32>(a: __m512) -> __m256i {
15648 unsafe {
15649 static_assert_extended_rounding!(ROUNDING);
15650 let a: Simd = a.as_f32x16();
15651 let r: Simd = vcvtps2ph(a, ROUNDING, src:i16x16::ZERO, mask:0b11111111_11111111);
15652 transmute(src:r)
15653 }
15654}
15655
15656/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
15657/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:
15658/// * [`_MM_FROUND_TO_NEAREST_INT`] // round to nearest
15659/// * [`_MM_FROUND_TO_NEG_INF`] // round down
15660/// * [`_MM_FROUND_TO_POS_INF`] // round up
15661/// * [`_MM_FROUND_TO_ZERO`] // truncate
15662/// * [`_MM_FROUND_CUR_DIRECTION`] // use MXCSR.RC; see [`_MM_SET_ROUNDING_MODE`]
15663/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] // round to nearest, and suppress exceptions
15664/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] // round down, and suppress exceptions
15665/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] // round up, and suppress exceptions
15666/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] // truncate, and suppress exceptions
15667/// * [`_MM_FROUND_CUR_DIRECTION`] | [`_MM_FROUND_NO_EXC`] // use MXCSR.RC and suppress exceptions; see [`_MM_SET_ROUNDING_MODE`]
15668///
15669/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtps_ph&expand=1779)
15670#[inline]
15671#[target_feature(enable = "avx512f")]
15672#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15673#[cfg_attr(test, assert_instr(vcvtps2ph, ROUNDING = 8))]
15674#[rustc_legacy_const_generics(3)]
15675pub fn _mm512_mask_cvtps_ph<const ROUNDING: i32>(src: __m256i, k: __mmask16, a: __m512) -> __m256i {
15676 unsafe {
15677 static_assert_extended_rounding!(ROUNDING);
15678 let a: Simd = a.as_f32x16();
15679 let src: Simd = src.as_i16x16();
15680 let r: Simd = vcvtps2ph(a, ROUNDING, src, mask:k);
15681 transmute(src:r)
15682 }
15683}
15684
15685/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
15686/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:
15687/// * [`_MM_FROUND_TO_NEAREST_INT`] // round to nearest
15688/// * [`_MM_FROUND_TO_NEG_INF`] // round down
15689/// * [`_MM_FROUND_TO_POS_INF`] // round up
15690/// * [`_MM_FROUND_TO_ZERO`] // truncate
15691/// * [`_MM_FROUND_CUR_DIRECTION`] // use MXCSR.RC; see [`_MM_SET_ROUNDING_MODE`]
15692/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] // round to nearest, and suppress exceptions
15693/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] // round down, and suppress exceptions
15694/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] // round up, and suppress exceptions
15695/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] // truncate, and suppress exceptions
15696/// * [`_MM_FROUND_CUR_DIRECTION`] | [`_MM_FROUND_NO_EXC`] // use MXCSR.RC and suppress exceptions; see [`_MM_SET_ROUNDING_MODE`]
15697///
15698/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtps_ph&expand=1780)
15699#[inline]
15700#[target_feature(enable = "avx512f")]
15701#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15702#[cfg_attr(test, assert_instr(vcvtps2ph, ROUNDING = 8))]
15703#[rustc_legacy_const_generics(2)]
15704pub fn _mm512_maskz_cvtps_ph<const ROUNDING: i32>(k: __mmask16, a: __m512) -> __m256i {
15705 unsafe {
15706 static_assert_extended_rounding!(ROUNDING);
15707 let a: Simd = a.as_f32x16();
15708 let r: Simd = vcvtps2ph(a, ROUNDING, src:i16x16::ZERO, mask:k);
15709 transmute(src:r)
15710 }
15711}
15712
15713/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
15714/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
15715/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
15716/// * [`_MM_FROUND_TO_NEG_INF`] : round down
15717/// * [`_MM_FROUND_TO_POS_INF`] : round up
15718/// * [`_MM_FROUND_TO_ZERO`] : truncate
15719/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15720///
15721/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtps_ph&expand=1776)
15722#[inline]
15723#[target_feature(enable = "avx512f,avx512vl")]
15724#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15725#[cfg_attr(test, assert_instr(vcvtps2ph, IMM8 = 8))]
15726#[rustc_legacy_const_generics(3)]
15727pub fn _mm256_mask_cvtps_ph<const IMM8: i32>(src: __m128i, k: __mmask8, a: __m256) -> __m128i {
15728 unsafe {
15729 static_assert_uimm_bits!(IMM8, 8);
15730 let a: Simd = a.as_f32x8();
15731 let src: Simd = src.as_i16x8();
15732 let r: Simd = vcvtps2ph256(a, IMM8, src, mask:k);
15733 transmute(src:r)
15734 }
15735}
15736
15737/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
15738/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
15739/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
15740/// * [`_MM_FROUND_TO_NEG_INF`] : round down
15741/// * [`_MM_FROUND_TO_POS_INF`] : round up
15742/// * [`_MM_FROUND_TO_ZERO`] : truncate
15743/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15744///
15745/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtps_ph&expand=1777)
15746#[inline]
15747#[target_feature(enable = "avx512f,avx512vl")]
15748#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15749#[cfg_attr(test, assert_instr(vcvtps2ph, IMM8 = 8))]
15750#[rustc_legacy_const_generics(2)]
15751pub fn _mm256_maskz_cvtps_ph<const IMM8: i32>(k: __mmask8, a: __m256) -> __m128i {
15752 unsafe {
15753 static_assert_uimm_bits!(IMM8, 8);
15754 let a: Simd = a.as_f32x8();
15755 let r: Simd = vcvtps2ph256(a, IMM8, src:i16x8::ZERO, mask:k);
15756 transmute(src:r)
15757 }
15758}
15759
15760/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
15761/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
15762/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
15763/// * [`_MM_FROUND_TO_NEG_INF`] : round down
15764/// * [`_MM_FROUND_TO_POS_INF`] : round up
15765/// * [`_MM_FROUND_TO_ZERO`] : truncate
15766/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15767///
15768/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtps_ph&expand=1773)
15769#[inline]
15770#[target_feature(enable = "avx512f,avx512vl")]
15771#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15772#[cfg_attr(test, assert_instr(vcvtps2ph, IMM8 = 8))]
15773#[rustc_legacy_const_generics(3)]
15774pub fn _mm_mask_cvtps_ph<const IMM8: i32>(src: __m128i, k: __mmask8, a: __m128) -> __m128i {
15775 unsafe {
15776 static_assert_uimm_bits!(IMM8, 8);
15777 let a: Simd = a.as_f32x4();
15778 let src: Simd = src.as_i16x8();
15779 let r: Simd = vcvtps2ph128(a, IMM8, src, mask:k);
15780 transmute(src:r)
15781 }
15782}
15783
15784/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
15785/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
15786/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
15787/// * [`_MM_FROUND_TO_NEG_INF`] : round down
15788/// * [`_MM_FROUND_TO_POS_INF`] : round up
15789/// * [`_MM_FROUND_TO_ZERO`] : truncate
15790/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15791///
15792/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtps_ph&expand=1774)
15793#[inline]
15794#[target_feature(enable = "avx512f,avx512vl")]
15795#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15796#[cfg_attr(test, assert_instr(vcvtps2ph, IMM8 = 8))]
15797#[rustc_legacy_const_generics(2)]
15798pub fn _mm_maskz_cvtps_ph<const IMM8: i32>(k: __mmask8, a: __m128) -> __m128i {
15799 unsafe {
15800 static_assert_uimm_bits!(IMM8, 8);
15801 let a: Simd = a.as_f32x4();
15802 let r: Simd = vcvtps2ph128(a, IMM8, src:i16x8::ZERO, mask:k);
15803 transmute(src:r)
15804 }
15805}
15806
15807/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst.\
15808/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15809///
15810/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundph_ps&expand=1332)
15811#[inline]
15812#[target_feature(enable = "avx512f")]
15813#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15814#[cfg_attr(test, assert_instr(vcvtph2ps, SAE = 8))]
15815#[rustc_legacy_const_generics(1)]
15816pub fn _mm512_cvt_roundph_ps<const SAE: i32>(a: __m256i) -> __m512 {
15817 unsafe {
15818 static_assert_sae!(SAE);
15819 let a: Simd = a.as_i16x16();
15820 let r: Simd = vcvtph2ps(a, src:f32x16::ZERO, mask:0b11111111_11111111, SAE);
15821 transmute(src:r)
15822 }
15823}
15824
15825/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
15826/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15827///
15828/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundph_ps&expand=1333)
15829#[inline]
15830#[target_feature(enable = "avx512f")]
15831#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15832#[cfg_attr(test, assert_instr(vcvtph2ps, SAE = 8))]
15833#[rustc_legacy_const_generics(3)]
15834pub fn _mm512_mask_cvt_roundph_ps<const SAE: i32>(src: __m512, k: __mmask16, a: __m256i) -> __m512 {
15835 unsafe {
15836 static_assert_sae!(SAE);
15837 let a: Simd = a.as_i16x16();
15838 let src: Simd = src.as_f32x16();
15839 let r: Simd = vcvtph2ps(a, src, mask:k, SAE);
15840 transmute(src:r)
15841 }
15842}
15843
15844/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
15845/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15846///
15847/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundph_ps&expand=1334)
15848#[inline]
15849#[target_feature(enable = "avx512f")]
15850#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15851#[cfg_attr(test, assert_instr(vcvtph2ps, SAE = 8))]
15852#[rustc_legacy_const_generics(2)]
15853pub fn _mm512_maskz_cvt_roundph_ps<const SAE: i32>(k: __mmask16, a: __m256i) -> __m512 {
15854 unsafe {
15855 static_assert_sae!(SAE);
15856 let a: Simd = a.as_i16x16();
15857 let r: Simd = vcvtph2ps(a, src:f32x16::ZERO, mask:k, SAE);
15858 transmute(src:r)
15859 }
15860}
15861
15862/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst.
15863///
15864/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtph_ps&expand=1723)
15865#[inline]
15866#[target_feature(enable = "avx512f")]
15867#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15868#[cfg_attr(test, assert_instr(vcvtph2ps))]
15869pub fn _mm512_cvtph_ps(a: __m256i) -> __m512 {
15870 unsafe {
15871 transmute(src:vcvtph2ps(
15872 a.as_i16x16(),
15873 src:f32x16::ZERO,
15874 mask:0b11111111_11111111,
15875 _MM_FROUND_NO_EXC,
15876 ))
15877 }
15878}
15879
15880/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15881///
15882/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtph_ps&expand=1724)
15883#[inline]
15884#[target_feature(enable = "avx512f")]
15885#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15886#[cfg_attr(test, assert_instr(vcvtph2ps))]
15887pub fn _mm512_mask_cvtph_ps(src: __m512, k: __mmask16, a: __m256i) -> __m512 {
15888 unsafe {
15889 transmute(src:vcvtph2ps(
15890 a.as_i16x16(),
15891 src.as_f32x16(),
15892 mask:k,
15893 _MM_FROUND_NO_EXC,
15894 ))
15895 }
15896}
15897
15898/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15899///
15900/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtph_ps&expand=1725)
15901#[inline]
15902#[target_feature(enable = "avx512f")]
15903#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15904#[cfg_attr(test, assert_instr(vcvtph2ps))]
15905pub fn _mm512_maskz_cvtph_ps(k: __mmask16, a: __m256i) -> __m512 {
15906 unsafe { transmute(src:vcvtph2ps(a.as_i16x16(), src:f32x16::ZERO, mask:k, _MM_FROUND_NO_EXC)) }
15907}
15908
15909/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15910///
15911/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtph_ps&expand=1721)
15912#[inline]
15913#[target_feature(enable = "avx512f,avx512vl")]
15914#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15915#[cfg_attr(test, assert_instr(vcvtph2ps))]
15916pub fn _mm256_mask_cvtph_ps(src: __m256, k: __mmask8, a: __m128i) -> __m256 {
15917 unsafe {
15918 let convert: __m256 = _mm256_cvtph_ps(a);
15919 transmute(src:simd_select_bitmask(m:k, yes:convert.as_f32x8(), no:src.as_f32x8()))
15920 }
15921}
15922
15923/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15924///
15925/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtph_ps&expand=1722)
15926#[inline]
15927#[target_feature(enable = "avx512f,avx512vl")]
15928#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15929#[cfg_attr(test, assert_instr(vcvtph2ps))]
15930pub fn _mm256_maskz_cvtph_ps(k: __mmask8, a: __m128i) -> __m256 {
15931 unsafe {
15932 let convert: __m256 = _mm256_cvtph_ps(a);
15933 transmute(src:simd_select_bitmask(m:k, yes:convert.as_f32x8(), no:f32x8::ZERO))
15934 }
15935}
15936
15937/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15938///
15939/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtph_ps&expand=1718)
15940#[inline]
15941#[target_feature(enable = "avx512f,avx512vl")]
15942#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15943#[cfg_attr(test, assert_instr(vcvtph2ps))]
15944pub fn _mm_mask_cvtph_ps(src: __m128, k: __mmask8, a: __m128i) -> __m128 {
15945 unsafe {
15946 let convert: __m128 = _mm_cvtph_ps(a);
15947 transmute(src:simd_select_bitmask(m:k, yes:convert.as_f32x4(), no:src.as_f32x4()))
15948 }
15949}
15950
15951/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15952///
15953/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtph_ps&expand=1719)
15954#[inline]
15955#[target_feature(enable = "avx512f,avx512vl")]
15956#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15957#[cfg_attr(test, assert_instr(vcvtph2ps))]
15958pub fn _mm_maskz_cvtph_ps(k: __mmask8, a: __m128i) -> __m128 {
15959 unsafe {
15960 let convert: __m128 = _mm_cvtph_ps(a);
15961 transmute(src:simd_select_bitmask(m:k, yes:convert.as_f32x4(), no:f32x4::ZERO))
15962 }
15963}
15964
15965/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst.\
15966/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15967///
15968/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtt_roundps_epi32&expand=1916)
15969#[inline]
15970#[target_feature(enable = "avx512f")]
15971#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15972#[cfg_attr(test, assert_instr(vcvttps2dq, SAE = 8))]
15973#[rustc_legacy_const_generics(1)]
15974pub fn _mm512_cvtt_roundps_epi32<const SAE: i32>(a: __m512) -> __m512i {
15975 unsafe {
15976 static_assert_sae!(SAE);
15977 let a: Simd = a.as_f32x16();
15978 let r: Simd = vcvttps2dq(a, src:i32x16::ZERO, mask:0b11111111_11111111, SAE);
15979 transmute(src:r)
15980 }
15981}
15982
15983/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
15984/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15985///
15986/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtt_roundps_epi32&expand=1917)
15987#[inline]
15988#[target_feature(enable = "avx512f")]
15989#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15990#[cfg_attr(test, assert_instr(vcvttps2dq, SAE = 8))]
15991#[rustc_legacy_const_generics(3)]
15992pub fn _mm512_mask_cvtt_roundps_epi32<const SAE: i32>(
15993 src: __m512i,
15994 k: __mmask16,
15995 a: __m512,
15996) -> __m512i {
15997 unsafe {
15998 static_assert_sae!(SAE);
15999 let a: Simd = a.as_f32x16();
16000 let src: Simd = src.as_i32x16();
16001 let r: Simd = vcvttps2dq(a, src, mask:k, SAE);
16002 transmute(src:r)
16003 }
16004}
16005
16006/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
16007/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
16008///
16009/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtt_roundps_epi32&expand=1918)
16010#[inline]
16011#[target_feature(enable = "avx512f")]
16012#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16013#[cfg_attr(test, assert_instr(vcvttps2dq, SAE = 8))]
16014#[rustc_legacy_const_generics(2)]
16015pub fn _mm512_maskz_cvtt_roundps_epi32<const SAE: i32>(k: __mmask16, a: __m512) -> __m512i {
16016 unsafe {
16017 static_assert_sae!(SAE);
16018 let a: Simd = a.as_f32x16();
16019 let r: Simd = vcvttps2dq(a, src:i32x16::ZERO, mask:k, SAE);
16020 transmute(src:r)
16021 }
16022}
16023
16024/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst.\
16025/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
16026///
16027/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtt_roundps_epu32&expand=1922)
16028#[inline]
16029#[target_feature(enable = "avx512f")]
16030#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16031#[cfg_attr(test, assert_instr(vcvttps2udq, SAE = 8))]
16032#[rustc_legacy_const_generics(1)]
16033pub fn _mm512_cvtt_roundps_epu32<const SAE: i32>(a: __m512) -> __m512i {
16034 unsafe {
16035 static_assert_sae!(SAE);
16036 let a: Simd = a.as_f32x16();
16037 let r: Simd = vcvttps2udq(a, src:u32x16::ZERO, mask:0b11111111_11111111, SAE);
16038 transmute(src:r)
16039 }
16040}
16041
16042/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
16043/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
16044///
16045/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtt_roundps_epu32&expand=1923)
16046#[inline]
16047#[target_feature(enable = "avx512f")]
16048#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16049#[cfg_attr(test, assert_instr(vcvttps2udq, SAE = 8))]
16050#[rustc_legacy_const_generics(3)]
16051pub fn _mm512_mask_cvtt_roundps_epu32<const SAE: i32>(
16052 src: __m512i,
16053 k: __mmask16,
16054 a: __m512,
16055) -> __m512i {
16056 unsafe {
16057 static_assert_sae!(SAE);
16058 let a: Simd = a.as_f32x16();
16059 let src: Simd = src.as_u32x16();
16060 let r: Simd = vcvttps2udq(a, src, mask:k, SAE);
16061 transmute(src:r)
16062 }
16063}
16064
16065/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
16066/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
16067///
16068/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtt_roundps_epu32&expand=1924)
16069#[inline]
16070#[target_feature(enable = "avx512f")]
16071#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16072#[cfg_attr(test, assert_instr(vcvttps2udq, SAE = 8))]
16073#[rustc_legacy_const_generics(2)]
16074pub fn _mm512_maskz_cvtt_roundps_epu32<const SAE: i32>(k: __mmask16, a: __m512) -> __m512i {
16075 unsafe {
16076 static_assert_sae!(SAE);
16077 let a: Simd = a.as_f32x16();
16078 let r: Simd = vcvttps2udq(a, src:u32x16::ZERO, mask:k, SAE);
16079 transmute(src:r)
16080 }
16081}
16082
16083/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst.\
16084/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
16085///
16086/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtt_roundpd_epi32&expand=1904)
16087#[inline]
16088#[target_feature(enable = "avx512f")]
16089#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16090#[cfg_attr(test, assert_instr(vcvttpd2dq, SAE = 8))]
16091#[rustc_legacy_const_generics(1)]
16092pub fn _mm512_cvtt_roundpd_epi32<const SAE: i32>(a: __m512d) -> __m256i {
16093 unsafe {
16094 static_assert_sae!(SAE);
16095 let a: Simd = a.as_f64x8();
16096 let r: Simd = vcvttpd2dq(a, src:i32x8::ZERO, mask:0b11111111, SAE);
16097 transmute(src:r)
16098 }
16099}
16100
16101/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
16102/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
16103///
16104/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtt_roundpd_epi32&expand=1905)
16105#[inline]
16106#[target_feature(enable = "avx512f")]
16107#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16108#[cfg_attr(test, assert_instr(vcvttpd2dq, SAE = 8))]
16109#[rustc_legacy_const_generics(3)]
16110pub fn _mm512_mask_cvtt_roundpd_epi32<const SAE: i32>(
16111 src: __m256i,
16112 k: __mmask8,
16113 a: __m512d,
16114) -> __m256i {
16115 unsafe {
16116 static_assert_sae!(SAE);
16117 let a: Simd = a.as_f64x8();
16118 let src: Simd = src.as_i32x8();
16119 let r: Simd = vcvttpd2dq(a, src, mask:k, SAE);
16120 transmute(src:r)
16121 }
16122}
16123
16124/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
16125/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
16126///
16127/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtt_roundpd_epi32&expand=1918)
16128#[inline]
16129#[target_feature(enable = "avx512f")]
16130#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16131#[cfg_attr(test, assert_instr(vcvttpd2dq, SAE = 8))]
16132#[rustc_legacy_const_generics(2)]
16133pub fn _mm512_maskz_cvtt_roundpd_epi32<const SAE: i32>(k: __mmask8, a: __m512d) -> __m256i {
16134 unsafe {
16135 static_assert_sae!(SAE);
16136 let a: Simd = a.as_f64x8();
16137 let r: Simd = vcvttpd2dq(a, src:i32x8::ZERO, mask:k, SAE);
16138 transmute(src:r)
16139 }
16140}
16141
16142/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst.\
16143/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
16144///
16145/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtt_roundpd_epu32&expand=1910)
16146#[inline]
16147#[target_feature(enable = "avx512f")]
16148#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16149#[cfg_attr(test, assert_instr(vcvttpd2udq, SAE = 8))]
16150#[rustc_legacy_const_generics(1)]
16151pub fn _mm512_cvtt_roundpd_epu32<const SAE: i32>(a: __m512d) -> __m256i {
16152 unsafe {
16153 static_assert_sae!(SAE);
16154 let a: Simd = a.as_f64x8();
16155 let r: Simd = vcvttpd2udq(a, src:i32x8::ZERO, mask:0b11111111, SAE);
16156 transmute(src:r)
16157 }
16158}
16159
16160/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
16161/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
16162///
16163/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtt_roundpd_epu32&expand=1911)
16164#[inline]
16165#[target_feature(enable = "avx512f")]
16166#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16167#[cfg_attr(test, assert_instr(vcvttpd2udq, SAE = 8))]
16168#[rustc_legacy_const_generics(3)]
16169pub fn _mm512_mask_cvtt_roundpd_epu32<const SAE: i32>(
16170 src: __m256i,
16171 k: __mmask8,
16172 a: __m512d,
16173) -> __m256i {
16174 unsafe {
16175 static_assert_sae!(SAE);
16176 let a: Simd = a.as_f64x8();
16177 let src: Simd = src.as_i32x8();
16178 let r: Simd = vcvttpd2udq(a, src, mask:k, SAE);
16179 transmute(src:r)
16180 }
16181}
16182
16183/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst.
16184///
16185/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvttps_epi32&expand=1984)
16186#[inline]
16187#[target_feature(enable = "avx512f")]
16188#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16189#[cfg_attr(test, assert_instr(vcvttps2dq))]
16190pub fn _mm512_cvttps_epi32(a: __m512) -> __m512i {
16191 unsafe {
16192 transmute(src:vcvttps2dq(
16193 a.as_f32x16(),
16194 src:i32x16::ZERO,
16195 mask:0b11111111_11111111,
16196 _MM_FROUND_CUR_DIRECTION,
16197 ))
16198 }
16199}
16200
16201/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
16202///
16203/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvttps_epi32&expand=1985)
16204#[inline]
16205#[target_feature(enable = "avx512f")]
16206#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16207#[cfg_attr(test, assert_instr(vcvttps2dq))]
16208pub fn _mm512_mask_cvttps_epi32(src: __m512i, k: __mmask16, a: __m512) -> __m512i {
16209 unsafe {
16210 transmute(src:vcvttps2dq(
16211 a.as_f32x16(),
16212 src.as_i32x16(),
16213 mask:k,
16214 _MM_FROUND_CUR_DIRECTION,
16215 ))
16216 }
16217}
16218
16219/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
16220///
16221/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvttps_epi32&expand=1986)
16222#[inline]
16223#[target_feature(enable = "avx512f")]
16224#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16225#[cfg_attr(test, assert_instr(vcvttps2dq))]
16226pub fn _mm512_maskz_cvttps_epi32(k: __mmask16, a: __m512) -> __m512i {
16227 unsafe {
16228 transmute(src:vcvttps2dq(
16229 a.as_f32x16(),
16230 src:i32x16::ZERO,
16231 mask:k,
16232 _MM_FROUND_CUR_DIRECTION,
16233 ))
16234 }
16235}
16236
16237/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
16238///
16239/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvttps_epi32&expand=1982)
16240#[inline]
16241#[target_feature(enable = "avx512f,avx512vl")]
16242#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16243#[cfg_attr(test, assert_instr(vcvttps2dq))]
16244pub fn _mm256_mask_cvttps_epi32(src: __m256i, k: __mmask8, a: __m256) -> __m256i {
16245 unsafe { transmute(src:vcvttps2dq256(a.as_f32x8(), src.as_i32x8(), mask:k)) }
16246}
16247
16248/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
16249///
16250/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvttps_epi32&expand=1983)
16251#[inline]
16252#[target_feature(enable = "avx512f,avx512vl")]
16253#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16254#[cfg_attr(test, assert_instr(vcvttps2dq))]
16255pub fn _mm256_maskz_cvttps_epi32(k: __mmask8, a: __m256) -> __m256i {
16256 unsafe { transmute(src:vcvttps2dq256(a.as_f32x8(), src:i32x8::ZERO, mask:k)) }
16257}
16258
16259/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
16260///
16261/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvttps_epi32&expand=1979)
16262#[inline]
16263#[target_feature(enable = "avx512f,avx512vl")]
16264#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16265#[cfg_attr(test, assert_instr(vcvttps2dq))]
16266pub fn _mm_mask_cvttps_epi32(src: __m128i, k: __mmask8, a: __m128) -> __m128i {
16267 unsafe { transmute(src:vcvttps2dq128(a.as_f32x4(), src.as_i32x4(), mask:k)) }
16268}
16269
16270/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
16271///
16272/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvttps_epi32&expand=1980)
16273#[inline]
16274#[target_feature(enable = "avx512f,avx512vl")]
16275#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16276#[cfg_attr(test, assert_instr(vcvttps2dq))]
16277pub fn _mm_maskz_cvttps_epi32(k: __mmask8, a: __m128) -> __m128i {
16278 unsafe { transmute(src:vcvttps2dq128(a.as_f32x4(), src:i32x4::ZERO, mask:k)) }
16279}
16280
16281/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst.
16282///
16283/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvttps_epu32&expand=2002)
16284#[inline]
16285#[target_feature(enable = "avx512f")]
16286#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16287#[cfg_attr(test, assert_instr(vcvttps2udq))]
16288pub fn _mm512_cvttps_epu32(a: __m512) -> __m512i {
16289 unsafe {
16290 transmute(src:vcvttps2udq(
16291 a.as_f32x16(),
16292 src:u32x16::ZERO,
16293 mask:0b11111111_11111111,
16294 _MM_FROUND_CUR_DIRECTION,
16295 ))
16296 }
16297}
16298
16299/// Convert packed double-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
16300///
16301/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvttps_epu32&expand=2003)
16302#[inline]
16303#[target_feature(enable = "avx512f")]
16304#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16305#[cfg_attr(test, assert_instr(vcvttps2udq))]
16306pub fn _mm512_mask_cvttps_epu32(src: __m512i, k: __mmask16, a: __m512) -> __m512i {
16307 unsafe {
16308 transmute(src:vcvttps2udq(
16309 a.as_f32x16(),
16310 src.as_u32x16(),
16311 mask:k,
16312 _MM_FROUND_CUR_DIRECTION,
16313 ))
16314 }
16315}
16316
16317/// Convert packed double-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
16318///
16319/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvttps_epu32&expand=2004)
16320#[inline]
16321#[target_feature(enable = "avx512f")]
16322#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16323#[cfg_attr(test, assert_instr(vcvttps2udq))]
16324pub fn _mm512_maskz_cvttps_epu32(k: __mmask16, a: __m512) -> __m512i {
16325 unsafe {
16326 transmute(src:vcvttps2udq(
16327 a.as_f32x16(),
16328 src:u32x16::ZERO,
16329 mask:k,
16330 _MM_FROUND_CUR_DIRECTION,
16331 ))
16332 }
16333}
16334
16335/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst.
16336///
16337/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvttps_epu32&expand=1999)
16338#[inline]
16339#[target_feature(enable = "avx512f,avx512vl")]
16340#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16341#[cfg_attr(test, assert_instr(vcvttps2udq))]
16342pub fn _mm256_cvttps_epu32(a: __m256) -> __m256i {
16343 unsafe { transmute(src:vcvttps2udq256(a.as_f32x8(), src:u32x8::ZERO, mask:0b11111111)) }
16344}
16345
16346/// Convert packed double-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
16347///
16348/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvttps_epu32&expand=2000)
16349#[inline]
16350#[target_feature(enable = "avx512f,avx512vl")]
16351#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16352#[cfg_attr(test, assert_instr(vcvttps2udq))]
16353pub fn _mm256_mask_cvttps_epu32(src: __m256i, k: __mmask8, a: __m256) -> __m256i {
16354 unsafe { transmute(src:vcvttps2udq256(a.as_f32x8(), src.as_u32x8(), mask:k)) }
16355}
16356
16357/// Convert packed double-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
16358///
16359/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvttps_epu32&expand=2001)
16360#[inline]
16361#[target_feature(enable = "avx512f,avx512vl")]
16362#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16363#[cfg_attr(test, assert_instr(vcvttps2udq))]
16364pub fn _mm256_maskz_cvttps_epu32(k: __mmask8, a: __m256) -> __m256i {
16365 unsafe { transmute(src:vcvttps2udq256(a.as_f32x8(), src:u32x8::ZERO, mask:k)) }
16366}
16367
16368/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst.
16369///
16370/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttps_epu32&expand=1996)
16371#[inline]
16372#[target_feature(enable = "avx512f,avx512vl")]
16373#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16374#[cfg_attr(test, assert_instr(vcvttps2udq))]
16375pub fn _mm_cvttps_epu32(a: __m128) -> __m128i {
16376 unsafe { transmute(src:vcvttps2udq128(a.as_f32x4(), src:u32x4::ZERO, mask:0b11111111)) }
16377}
16378
16379/// Convert packed double-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
16380///
16381/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvttps_epu32&expand=1997)
16382#[inline]
16383#[target_feature(enable = "avx512f,avx512vl")]
16384#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16385#[cfg_attr(test, assert_instr(vcvttps2udq))]
16386pub fn _mm_mask_cvttps_epu32(src: __m128i, k: __mmask8, a: __m128) -> __m128i {
16387 unsafe { transmute(src:vcvttps2udq128(a.as_f32x4(), src.as_u32x4(), mask:k)) }
16388}
16389
16390/// Convert packed double-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
16391///
16392/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvttps_epu32&expand=1998)
16393#[inline]
16394#[target_feature(enable = "avx512f,avx512vl")]
16395#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16396#[cfg_attr(test, assert_instr(vcvttps2udq))]
16397pub fn _mm_maskz_cvttps_epu32(k: __mmask8, a: __m128) -> __m128i {
16398 unsafe { transmute(src:vcvttps2udq128(a.as_f32x4(), src:u32x4::ZERO, mask:k)) }
16399}
16400
16401/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
16402/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
16403///
16404/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtt_roundpd_epu32&expand=1912)
16405#[inline]
16406#[target_feature(enable = "avx512f")]
16407#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16408#[cfg_attr(test, assert_instr(vcvttpd2udq, SAE = 8))]
16409#[rustc_legacy_const_generics(2)]
16410pub fn _mm512_maskz_cvtt_roundpd_epu32<const SAE: i32>(k: __mmask8, a: __m512d) -> __m256i {
16411 unsafe {
16412 static_assert_sae!(SAE);
16413 let a: Simd = a.as_f64x8();
16414 let r: Simd = vcvttpd2udq(a, src:i32x8::ZERO, mask:k, SAE);
16415 transmute(src:r)
16416 }
16417}
16418
16419/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst.
16420///
16421/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvttpd_epi32&expand=1947)
16422#[inline]
16423#[target_feature(enable = "avx512f")]
16424#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16425#[cfg_attr(test, assert_instr(vcvttpd2dq))]
16426pub fn _mm512_cvttpd_epi32(a: __m512d) -> __m256i {
16427 unsafe {
16428 transmute(src:vcvttpd2dq(
16429 a.as_f64x8(),
16430 src:i32x8::ZERO,
16431 mask:0b11111111,
16432 _MM_FROUND_CUR_DIRECTION,
16433 ))
16434 }
16435}
16436
16437/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
16438///
16439/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvttpd_epi32&expand=1948)
16440#[inline]
16441#[target_feature(enable = "avx512f")]
16442#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16443#[cfg_attr(test, assert_instr(vcvttpd2dq))]
16444pub fn _mm512_mask_cvttpd_epi32(src: __m256i, k: __mmask8, a: __m512d) -> __m256i {
16445 unsafe {
16446 transmute(src:vcvttpd2dq(
16447 a.as_f64x8(),
16448 src.as_i32x8(),
16449 mask:k,
16450 _MM_FROUND_CUR_DIRECTION,
16451 ))
16452 }
16453}
16454
16455/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
16456///
16457/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvttpd_epi32&expand=1949)
16458#[inline]
16459#[target_feature(enable = "avx512f")]
16460#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16461#[cfg_attr(test, assert_instr(vcvttpd2dq))]
16462pub fn _mm512_maskz_cvttpd_epi32(k: __mmask8, a: __m512d) -> __m256i {
16463 unsafe {
16464 transmute(src:vcvttpd2dq(
16465 a.as_f64x8(),
16466 src:i32x8::ZERO,
16467 mask:k,
16468 _MM_FROUND_CUR_DIRECTION,
16469 ))
16470 }
16471}
16472
16473/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
16474///
16475/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvttpd_epi32&expand=1945)
16476#[inline]
16477#[target_feature(enable = "avx512f,avx512vl")]
16478#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16479#[cfg_attr(test, assert_instr(vcvttpd2dq))]
16480pub fn _mm256_mask_cvttpd_epi32(src: __m128i, k: __mmask8, a: __m256d) -> __m128i {
16481 unsafe { transmute(src:vcvttpd2dq256(a.as_f64x4(), src.as_i32x4(), mask:k)) }
16482}
16483
16484/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
16485///
16486/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvttpd_epi32&expand=1946)
16487#[inline]
16488#[target_feature(enable = "avx512f,avx512vl")]
16489#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16490#[cfg_attr(test, assert_instr(vcvttpd2dq))]
16491pub fn _mm256_maskz_cvttpd_epi32(k: __mmask8, a: __m256d) -> __m128i {
16492 unsafe { transmute(src:vcvttpd2dq256(a.as_f64x4(), src:i32x4::ZERO, mask:k)) }
16493}
16494
16495/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
16496///
16497/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvttpd_epi32&expand=1942)
16498#[inline]
16499#[target_feature(enable = "avx512f,avx512vl")]
16500#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16501#[cfg_attr(test, assert_instr(vcvttpd2dq))]
16502pub fn _mm_mask_cvttpd_epi32(src: __m128i, k: __mmask8, a: __m128d) -> __m128i {
16503 unsafe { transmute(src:vcvttpd2dq128(a.as_f64x2(), src.as_i32x4(), mask:k)) }
16504}
16505
16506/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
16507///
16508/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvttpd_epi32&expand=1943)
16509#[inline]
16510#[target_feature(enable = "avx512f,avx512vl")]
16511#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16512#[cfg_attr(test, assert_instr(vcvttpd2dq))]
16513pub fn _mm_maskz_cvttpd_epi32(k: __mmask8, a: __m128d) -> __m128i {
16514 unsafe { transmute(src:vcvttpd2dq128(a.as_f64x2(), src:i32x4::ZERO, mask:k)) }
16515}
16516
16517/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst.
16518///
16519/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvttpd_epu32&expand=1965)
16520#[inline]
16521#[target_feature(enable = "avx512f")]
16522#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16523#[cfg_attr(test, assert_instr(vcvttpd2udq))]
16524pub fn _mm512_cvttpd_epu32(a: __m512d) -> __m256i {
16525 unsafe {
16526 transmute(src:vcvttpd2udq(
16527 a.as_f64x8(),
16528 src:i32x8::ZERO,
16529 mask:0b11111111,
16530 _MM_FROUND_CUR_DIRECTION,
16531 ))
16532 }
16533}
16534
16535/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
16536///
16537/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvttpd_epu32&expand=1966)
16538#[inline]
16539#[target_feature(enable = "avx512f")]
16540#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16541#[cfg_attr(test, assert_instr(vcvttpd2udq))]
16542pub fn _mm512_mask_cvttpd_epu32(src: __m256i, k: __mmask8, a: __m512d) -> __m256i {
16543 unsafe {
16544 transmute(src:vcvttpd2udq(
16545 a.as_f64x8(),
16546 src.as_i32x8(),
16547 mask:k,
16548 _MM_FROUND_CUR_DIRECTION,
16549 ))
16550 }
16551}
16552
16553/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
16554///
16555/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvttpd_epu32&expand=1967)
16556#[inline]
16557#[target_feature(enable = "avx512f")]
16558#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16559#[cfg_attr(test, assert_instr(vcvttpd2udq))]
16560pub fn _mm512_maskz_cvttpd_epu32(k: __mmask8, a: __m512d) -> __m256i {
16561 unsafe {
16562 transmute(src:vcvttpd2udq(
16563 a.as_f64x8(),
16564 src:i32x8::ZERO,
16565 mask:k,
16566 _MM_FROUND_CUR_DIRECTION,
16567 ))
16568 }
16569}
16570
16571/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst.
16572///
16573/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvttpd_epu32&expand=1962)
16574#[inline]
16575#[target_feature(enable = "avx512f,avx512vl")]
16576#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16577#[cfg_attr(test, assert_instr(vcvttpd2udq))]
16578pub fn _mm256_cvttpd_epu32(a: __m256d) -> __m128i {
16579 unsafe { transmute(src:vcvttpd2udq256(a.as_f64x4(), src:i32x4::ZERO, mask:0b11111111)) }
16580}
16581
16582/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
16583///
16584/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvttpd_epu32&expand=1963)
16585#[inline]
16586#[target_feature(enable = "avx512f,avx512vl")]
16587#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16588#[cfg_attr(test, assert_instr(vcvttpd2udq))]
16589pub fn _mm256_mask_cvttpd_epu32(src: __m128i, k: __mmask8, a: __m256d) -> __m128i {
16590 unsafe { transmute(src:vcvttpd2udq256(a.as_f64x4(), src.as_i32x4(), mask:k)) }
16591}
16592
16593/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
16594///
16595/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvttpd_epu32&expand=1964)
16596#[inline]
16597#[target_feature(enable = "avx512f,avx512vl")]
16598#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16599#[cfg_attr(test, assert_instr(vcvttpd2udq))]
16600pub fn _mm256_maskz_cvttpd_epu32(k: __mmask8, a: __m256d) -> __m128i {
16601 unsafe { transmute(src:vcvttpd2udq256(a.as_f64x4(), src:i32x4::ZERO, mask:k)) }
16602}
16603
16604/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst.
16605///
16606/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttpd_epu32&expand=1959)
16607#[inline]
16608#[target_feature(enable = "avx512f,avx512vl")]
16609#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16610#[cfg_attr(test, assert_instr(vcvttpd2udq))]
16611pub fn _mm_cvttpd_epu32(a: __m128d) -> __m128i {
16612 unsafe { transmute(src:vcvttpd2udq128(a.as_f64x2(), src:i32x4::ZERO, mask:0b11111111)) }
16613}
16614
16615/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
16616///
16617/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvttpd_epu32&expand=1960)
16618#[inline]
16619#[target_feature(enable = "avx512f,avx512vl")]
16620#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16621#[cfg_attr(test, assert_instr(vcvttpd2udq))]
16622pub fn _mm_mask_cvttpd_epu32(src: __m128i, k: __mmask8, a: __m128d) -> __m128i {
16623 unsafe { transmute(src:vcvttpd2udq128(a.as_f64x2(), src.as_i32x4(), mask:k)) }
16624}
16625
16626/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
16627///
16628/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvttpd_epu32&expand=1961)
16629#[inline]
16630#[target_feature(enable = "avx512f,avx512vl")]
16631#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16632#[cfg_attr(test, assert_instr(vcvttpd2udq))]
16633pub fn _mm_maskz_cvttpd_epu32(k: __mmask8, a: __m128d) -> __m128i {
16634 unsafe { transmute(src:vcvttpd2udq128(a.as_f64x2(), src:i32x4::ZERO, mask:k)) }
16635}
16636
16637/// Returns vector of type `__m512d` with all elements set to zero.
16638///
16639/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setzero_pd&expand=5018)
16640#[inline]
16641#[target_feature(enable = "avx512f")]
16642#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16643#[cfg_attr(test, assert_instr(vxorps))]
16644#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
16645pub const fn _mm512_setzero_pd() -> __m512d {
16646 // All-0 is a properly initialized __m512d
16647 unsafe { const { mem::zeroed() } }
16648}
16649
16650/// Returns vector of type `__m512` with all elements set to zero.
16651///
16652/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setzero_ps&expand=5021)
16653#[inline]
16654#[target_feature(enable = "avx512f")]
16655#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16656#[cfg_attr(test, assert_instr(vxorps))]
16657#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
16658pub const fn _mm512_setzero_ps() -> __m512 {
16659 // All-0 is a properly initialized __m512
16660 unsafe { const { mem::zeroed() } }
16661}
16662
16663/// Return vector of type `__m512` with all elements set to zero.
16664///
16665/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setzero&expand=5014)
16666#[inline]
16667#[target_feature(enable = "avx512f")]
16668#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16669#[cfg_attr(test, assert_instr(vxorps))]
16670#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
16671pub const fn _mm512_setzero() -> __m512 {
16672 // All-0 is a properly initialized __m512
16673 unsafe { const { mem::zeroed() } }
16674}
16675
16676/// Returns vector of type `__m512i` with all elements set to zero.
16677///
16678/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setzero_si512&expand=5024)
16679#[inline]
16680#[target_feature(enable = "avx512f")]
16681#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16682#[cfg_attr(test, assert_instr(vxorps))]
16683#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
16684pub const fn _mm512_setzero_si512() -> __m512i {
16685 // All-0 is a properly initialized __m512i
16686 unsafe { const { mem::zeroed() } }
16687}
16688
16689/// Return vector of type `__m512i` with all elements set to zero.
16690///
16691/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setzero_epi32&expand=5015)
16692#[inline]
16693#[target_feature(enable = "avx512f")]
16694#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16695#[cfg_attr(test, assert_instr(vxorps))]
16696#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
16697pub const fn _mm512_setzero_epi32() -> __m512i {
16698 // All-0 is a properly initialized __m512i
16699 unsafe { const { mem::zeroed() } }
16700}
16701
16702/// Sets packed 32-bit integers in `dst` with the supplied values in reverse
16703/// order.
16704///
16705/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setr_epi32&expand=4991)
16706#[inline]
16707#[target_feature(enable = "avx512f")]
16708#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16709#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
16710pub const fn _mm512_setr_epi32(
16711 e15: i32,
16712 e14: i32,
16713 e13: i32,
16714 e12: i32,
16715 e11: i32,
16716 e10: i32,
16717 e9: i32,
16718 e8: i32,
16719 e7: i32,
16720 e6: i32,
16721 e5: i32,
16722 e4: i32,
16723 e3: i32,
16724 e2: i32,
16725 e1: i32,
16726 e0: i32,
16727) -> __m512i {
16728 unsafe {
16729 let r: Simd = i32x16::new(
16730 x0:e15, x1:e14, x2:e13, x3:e12, x4:e11, x5:e10, x6:e9, x7:e8, x8:e7, x9:e6, x10:e5, x11:e4, x12:e3, x13:e2, x14:e1, x15:e0,
16731 );
16732 transmute(src:r)
16733 }
16734}
16735
16736/// Set packed 8-bit integers in dst with the supplied values.
16737///
16738/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set_epi8&expand=4915)
16739#[inline]
16740#[target_feature(enable = "avx512f")]
16741#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16742#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
16743pub const fn _mm512_set_epi8(
16744 e63: i8,
16745 e62: i8,
16746 e61: i8,
16747 e60: i8,
16748 e59: i8,
16749 e58: i8,
16750 e57: i8,
16751 e56: i8,
16752 e55: i8,
16753 e54: i8,
16754 e53: i8,
16755 e52: i8,
16756 e51: i8,
16757 e50: i8,
16758 e49: i8,
16759 e48: i8,
16760 e47: i8,
16761 e46: i8,
16762 e45: i8,
16763 e44: i8,
16764 e43: i8,
16765 e42: i8,
16766 e41: i8,
16767 e40: i8,
16768 e39: i8,
16769 e38: i8,
16770 e37: i8,
16771 e36: i8,
16772 e35: i8,
16773 e34: i8,
16774 e33: i8,
16775 e32: i8,
16776 e31: i8,
16777 e30: i8,
16778 e29: i8,
16779 e28: i8,
16780 e27: i8,
16781 e26: i8,
16782 e25: i8,
16783 e24: i8,
16784 e23: i8,
16785 e22: i8,
16786 e21: i8,
16787 e20: i8,
16788 e19: i8,
16789 e18: i8,
16790 e17: i8,
16791 e16: i8,
16792 e15: i8,
16793 e14: i8,
16794 e13: i8,
16795 e12: i8,
16796 e11: i8,
16797 e10: i8,
16798 e9: i8,
16799 e8: i8,
16800 e7: i8,
16801 e6: i8,
16802 e5: i8,
16803 e4: i8,
16804 e3: i8,
16805 e2: i8,
16806 e1: i8,
16807 e0: i8,
16808) -> __m512i {
16809 unsafe {
16810 let r: Simd = i8x64::new(
16811 x0:e0, x1:e1, x2:e2, x3:e3, x4:e4, x5:e5, x6:e6, x7:e7, x8:e8, x9:e9, x10:e10, x11:e11, x12:e12, x13:e13, x14:e14, x15:e15, x16:e16, x17:e17, x18:e18,
16812 x19:e19, x20:e20, x21:e21, x22:e22, x23:e23, x24:e24, x25:e25, x26:e26, x27:e27, x28:e28, x29:e29, x30:e30, x31:e31, x32:e32, x33:e33, x34:e34, x35:e35,
16813 x36:e36, x37:e37, x38:e38, x39:e39, x40:e40, x41:e41, x42:e42, x43:e43, x44:e44, x45:e45, x46:e46, x47:e47, x48:e48, x49:e49, x50:e50, x51:e51, x52:e52,
16814 x53:e53, x54:e54, x55:e55, x56:e56, x57:e57, x58:e58, x59:e59, x60:e60, x61:e61, x62:e62, x63:e63,
16815 );
16816 transmute(src:r)
16817 }
16818}
16819
16820/// Set packed 16-bit integers in dst with the supplied values.
16821///
16822/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set_epi16&expand=4905)
16823#[inline]
16824#[target_feature(enable = "avx512f")]
16825#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16826#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
16827pub const fn _mm512_set_epi16(
16828 e31: i16,
16829 e30: i16,
16830 e29: i16,
16831 e28: i16,
16832 e27: i16,
16833 e26: i16,
16834 e25: i16,
16835 e24: i16,
16836 e23: i16,
16837 e22: i16,
16838 e21: i16,
16839 e20: i16,
16840 e19: i16,
16841 e18: i16,
16842 e17: i16,
16843 e16: i16,
16844 e15: i16,
16845 e14: i16,
16846 e13: i16,
16847 e12: i16,
16848 e11: i16,
16849 e10: i16,
16850 e9: i16,
16851 e8: i16,
16852 e7: i16,
16853 e6: i16,
16854 e5: i16,
16855 e4: i16,
16856 e3: i16,
16857 e2: i16,
16858 e1: i16,
16859 e0: i16,
16860) -> __m512i {
16861 unsafe {
16862 let r: Simd = i16x32::new(
16863 x0:e0, x1:e1, x2:e2, x3:e3, x4:e4, x5:e5, x6:e6, x7:e7, x8:e8, x9:e9, x10:e10, x11:e11, x12:e12, x13:e13, x14:e14, x15:e15, x16:e16, x17:e17, x18:e18,
16864 x19:e19, x20:e20, x21:e21, x22:e22, x23:e23, x24:e24, x25:e25, x26:e26, x27:e27, x28:e28, x29:e29, x30:e30, x31:e31,
16865 );
16866 transmute(src:r)
16867 }
16868}
16869
16870/// Set packed 32-bit integers in dst with the repeated 4 element sequence.
16871///
16872/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set4_epi32&expand=4982)
16873#[inline]
16874#[target_feature(enable = "avx512f")]
16875#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16876#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
16877pub const fn _mm512_set4_epi32(d: i32, c: i32, b: i32, a: i32) -> __m512i {
16878 _mm512_set_epi32(e15:d, e14:c, e13:b, e12:a, e11:d, e10:c, e9:b, e8:a, e7:d, e6:c, e5:b, e4:a, e3:d, e2:c, e1:b, e0:a)
16879}
16880
16881/// Set packed single-precision (32-bit) floating-point elements in dst with the repeated 4 element sequence.
16882///
16883/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set4_ps&expand=4985)
16884#[inline]
16885#[target_feature(enable = "avx512f")]
16886#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16887#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
16888pub const fn _mm512_set4_ps(d: f32, c: f32, b: f32, a: f32) -> __m512 {
16889 _mm512_set_ps(e0:d, e1:c, e2:b, e3:a, e4:d, e5:c, e6:b, e7:a, e8:d, e9:c, e10:b, e11:a, e12:d, e13:c, e14:b, e15:a)
16890}
16891
16892/// Set packed double-precision (64-bit) floating-point elements in dst with the repeated 4 element sequence.
16893///
16894/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set4_pd&expand=4984)
16895#[inline]
16896#[target_feature(enable = "avx512f")]
16897#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16898#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
16899pub const fn _mm512_set4_pd(d: f64, c: f64, b: f64, a: f64) -> __m512d {
16900 _mm512_set_pd(e0:d, e1:c, e2:b, e3:a, e4:d, e5:c, e6:b, e7:a)
16901}
16902
16903/// Set packed 32-bit integers in dst with the repeated 4 element sequence in reverse order.
16904///
16905/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setr4_epi32&expand=5009)
16906#[inline]
16907#[target_feature(enable = "avx512f")]
16908#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16909#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
16910pub const fn _mm512_setr4_epi32(d: i32, c: i32, b: i32, a: i32) -> __m512i {
16911 _mm512_set_epi32(e15:a, e14:b, e13:c, e12:d, e11:a, e10:b, e9:c, e8:d, e7:a, e6:b, e5:c, e4:d, e3:a, e2:b, e1:c, e0:d)
16912}
16913
16914/// Set packed single-precision (32-bit) floating-point elements in dst with the repeated 4 element sequence in reverse order.
16915///
16916/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setr4_ps&expand=5012)
16917#[inline]
16918#[target_feature(enable = "avx512f")]
16919#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16920#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
16921pub const fn _mm512_setr4_ps(d: f32, c: f32, b: f32, a: f32) -> __m512 {
16922 _mm512_set_ps(e0:a, e1:b, e2:c, e3:d, e4:a, e5:b, e6:c, e7:d, e8:a, e9:b, e10:c, e11:d, e12:a, e13:b, e14:c, e15:d)
16923}
16924
16925/// Set packed double-precision (64-bit) floating-point elements in dst with the repeated 4 element sequence in reverse order.
16926///
16927/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setr4_pd&expand=5011)
16928#[inline]
16929#[target_feature(enable = "avx512f")]
16930#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16931#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
16932pub const fn _mm512_setr4_pd(d: f64, c: f64, b: f64, a: f64) -> __m512d {
16933 _mm512_set_pd(e0:a, e1:b, e2:c, e3:d, e4:a, e5:b, e6:c, e7:d)
16934}
16935
16936/// Set packed 64-bit integers in dst with the supplied values.
16937///
16938/// [Intel's documentation]( https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set_epi64&expand=4910)
16939#[inline]
16940#[target_feature(enable = "avx512f")]
16941#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16942#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
16943pub const fn _mm512_set_epi64(
16944 e0: i64,
16945 e1: i64,
16946 e2: i64,
16947 e3: i64,
16948 e4: i64,
16949 e5: i64,
16950 e6: i64,
16951 e7: i64,
16952) -> __m512i {
16953 _mm512_setr_epi64(e0:e7, e1:e6, e2:e5, e3:e4, e4:e3, e5:e2, e6:e1, e7:e0)
16954}
16955
16956/// Set packed 64-bit integers in dst with the supplied values in reverse order.
16957///
16958/// [Intel's documentation]( https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setr_epi64&expand=4993)
16959#[inline]
16960#[target_feature(enable = "avx512f")]
16961#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16962#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
16963pub const fn _mm512_setr_epi64(
16964 e0: i64,
16965 e1: i64,
16966 e2: i64,
16967 e3: i64,
16968 e4: i64,
16969 e5: i64,
16970 e6: i64,
16971 e7: i64,
16972) -> __m512i {
16973 unsafe {
16974 let r: Simd = i64x8::new(x0:e0, x1:e1, x2:e2, x3:e3, x4:e4, x5:e5, x6:e6, x7:e7);
16975 transmute(src:r)
16976 }
16977}
16978
16979/// Gather double-precision (64-bit) floating-point elements from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst. scale should be 1, 2, 4 or 8.
16980///
16981/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i32gather_pd&expand=3002)
16982#[inline]
16983#[target_feature(enable = "avx512f")]
16984#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16985#[cfg_attr(test, assert_instr(vgatherdpd, SCALE = 1))]
16986#[rustc_legacy_const_generics(2)]
16987pub unsafe fn _mm512_i32gather_pd<const SCALE: i32>(
16988 offsets: __m256i,
16989 slice: *const f64,
16990) -> __m512d {
16991 static_assert_imm8_scale!(SCALE);
16992 let zero: Simd = f64x8::ZERO;
16993 let neg_one: i8 = -1;
16994 let slice: *const i8 = slice as *const i8;
16995 let offsets: Simd = offsets.as_i32x8();
16996 let r: Simd = vgatherdpd(src:zero, slice, offsets, mask:neg_one, SCALE);
16997 transmute(src:r)
16998}
16999
17000/// Gather double-precision (64-bit) floating-point elements from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst using writemask k (elements are copied from src when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
17001///
17002/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i32gather_pd&expand=3003)
17003#[inline]
17004#[target_feature(enable = "avx512f")]
17005#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17006#[cfg_attr(test, assert_instr(vgatherdpd, SCALE = 1))]
17007#[rustc_legacy_const_generics(4)]
17008pub unsafe fn _mm512_mask_i32gather_pd<const SCALE: i32>(
17009 src: __m512d,
17010 mask: __mmask8,
17011 offsets: __m256i,
17012 slice: *const f64,
17013) -> __m512d {
17014 static_assert_imm8_scale!(SCALE);
17015 let src: Simd = src.as_f64x8();
17016 let slice: *const i8 = slice as *const i8;
17017 let offsets: Simd = offsets.as_i32x8();
17018 let r: Simd = vgatherdpd(src, slice, offsets, mask as i8, SCALE);
17019 transmute(src:r)
17020}
17021
17022/// Gather double-precision (64-bit) floating-point elements from memory using 64-bit indices. 64-bit elements are loaded from addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst. scale should be 1, 2, 4 or 8.
17023///
17024/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i64gather_pd&expand=3092)
17025#[inline]
17026#[target_feature(enable = "avx512f")]
17027#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17028#[cfg_attr(test, assert_instr(vgatherqpd, SCALE = 1))]
17029#[rustc_legacy_const_generics(2)]
17030pub unsafe fn _mm512_i64gather_pd<const SCALE: i32>(
17031 offsets: __m512i,
17032 slice: *const f64,
17033) -> __m512d {
17034 static_assert_imm8_scale!(SCALE);
17035 let zero: Simd = f64x8::ZERO;
17036 let neg_one: i8 = -1;
17037 let slice: *const i8 = slice as *const i8;
17038 let offsets: Simd = offsets.as_i64x8();
17039 let r: Simd = vgatherqpd(src:zero, slice, offsets, mask:neg_one, SCALE);
17040 transmute(src:r)
17041}
17042
17043/// Gather double-precision (64-bit) floating-point elements from memory using 64-bit indices. 64-bit elements are loaded from addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst using writemask k (elements are copied from src when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
17044///
17045/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i64gather_pd&expand=3093)
17046#[inline]
17047#[target_feature(enable = "avx512f")]
17048#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17049#[cfg_attr(test, assert_instr(vgatherqpd, SCALE = 1))]
17050#[rustc_legacy_const_generics(4)]
17051pub unsafe fn _mm512_mask_i64gather_pd<const SCALE: i32>(
17052 src: __m512d,
17053 mask: __mmask8,
17054 offsets: __m512i,
17055 slice: *const f64,
17056) -> __m512d {
17057 static_assert_imm8_scale!(SCALE);
17058 let src: Simd = src.as_f64x8();
17059 let slice: *const i8 = slice as *const i8;
17060 let offsets: Simd = offsets.as_i64x8();
17061 let r: Simd = vgatherqpd(src, slice, offsets, mask as i8, SCALE);
17062 transmute(src:r)
17063}
17064
17065/// Gather single-precision (32-bit) floating-point elements from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst. scale should be 1, 2, 4 or 8.
17066///
17067/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i64gather_ps&expand=3100)
17068#[inline]
17069#[target_feature(enable = "avx512f")]
17070#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17071#[cfg_attr(test, assert_instr(vgatherqps, SCALE = 1))]
17072#[rustc_legacy_const_generics(2)]
17073pub unsafe fn _mm512_i64gather_ps<const SCALE: i32>(offsets: __m512i, slice: *const f32) -> __m256 {
17074 static_assert_imm8_scale!(SCALE);
17075 let zero: Simd = f32x8::ZERO;
17076 let neg_one: i8 = -1;
17077 let slice: *const i8 = slice as *const i8;
17078 let offsets: Simd = offsets.as_i64x8();
17079 let r: Simd = vgatherqps(src:zero, slice, offsets, mask:neg_one, SCALE);
17080 transmute(src:r)
17081}
17082
17083/// Gather single-precision (32-bit) floating-point elements from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst using writemask k (elements are copied from src when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
17084///
17085/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i64gather_ps&expand=3101)
17086#[inline]
17087#[target_feature(enable = "avx512f")]
17088#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17089#[cfg_attr(test, assert_instr(vgatherqps, SCALE = 1))]
17090#[rustc_legacy_const_generics(4)]
17091pub unsafe fn _mm512_mask_i64gather_ps<const SCALE: i32>(
17092 src: __m256,
17093 mask: __mmask8,
17094 offsets: __m512i,
17095 slice: *const f32,
17096) -> __m256 {
17097 static_assert_imm8_scale!(SCALE);
17098 let src: Simd = src.as_f32x8();
17099 let slice: *const i8 = slice as *const i8;
17100 let offsets: Simd = offsets.as_i64x8();
17101 let r: Simd = vgatherqps(src, slice, offsets, mask as i8, SCALE);
17102 transmute(src:r)
17103}
17104
17105/// Gather single-precision (32-bit) floating-point elements from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst. scale should be 1, 2, 4 or 8.
17106///
17107/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i32gather_ps&expand=3010)
17108#[inline]
17109#[target_feature(enable = "avx512f")]
17110#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17111#[cfg_attr(test, assert_instr(vgatherdps, SCALE = 1))]
17112#[rustc_legacy_const_generics(2)]
17113pub unsafe fn _mm512_i32gather_ps<const SCALE: i32>(offsets: __m512i, slice: *const f32) -> __m512 {
17114 static_assert_imm8_scale!(SCALE);
17115 let zero: Simd = f32x16::ZERO;
17116 let neg_one: i16 = -1;
17117 let slice: *const i8 = slice as *const i8;
17118 let offsets: Simd = offsets.as_i32x16();
17119 let r: Simd = vgatherdps(src:zero, slice, offsets, mask:neg_one, SCALE);
17120 transmute(src:r)
17121}
17122
17123/// Gather single-precision (32-bit) floating-point elements from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst using writemask k (elements are copied from src when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
17124///
17125/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i32gather_ps&expand=3011)
17126#[inline]
17127#[target_feature(enable = "avx512f")]
17128#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17129#[cfg_attr(test, assert_instr(vgatherdps, SCALE = 1))]
17130#[rustc_legacy_const_generics(4)]
17131pub unsafe fn _mm512_mask_i32gather_ps<const SCALE: i32>(
17132 src: __m512,
17133 mask: __mmask16,
17134 offsets: __m512i,
17135 slice: *const f32,
17136) -> __m512 {
17137 static_assert_imm8_scale!(SCALE);
17138 let src: Simd = src.as_f32x16();
17139 let slice: *const i8 = slice as *const i8;
17140 let offsets: Simd = offsets.as_i32x16();
17141 let r: Simd = vgatherdps(src, slice, offsets, mask as i16, SCALE);
17142 transmute(src:r)
17143}
17144
17145/// Gather 32-bit integers from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst. scale should be 1, 2, 4 or 8.
17146///
17147/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i32gather_epi32&expand=2986)
17148#[inline]
17149#[target_feature(enable = "avx512f")]
17150#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17151#[cfg_attr(test, assert_instr(vpgatherdd, SCALE = 1))]
17152#[rustc_legacy_const_generics(2)]
17153pub unsafe fn _mm512_i32gather_epi32<const SCALE: i32>(
17154 offsets: __m512i,
17155 slice: *const i32,
17156) -> __m512i {
17157 static_assert_imm8_scale!(SCALE);
17158 let zero: Simd = i32x16::ZERO;
17159 let neg_one: i16 = -1;
17160 let slice: *const i8 = slice as *const i8;
17161 let offsets: Simd = offsets.as_i32x16();
17162 let r: Simd = vpgatherdd(src:zero, slice, offsets, mask:neg_one, SCALE);
17163 transmute(src:r)
17164}
17165
17166/// Gather 32-bit integers from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst using writemask k (elements are copied from src when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
17167///
17168/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i32gather_epi32&expand=2987)
17169#[inline]
17170#[target_feature(enable = "avx512f")]
17171#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17172#[cfg_attr(test, assert_instr(vpgatherdd, SCALE = 1))]
17173#[rustc_legacy_const_generics(4)]
17174pub unsafe fn _mm512_mask_i32gather_epi32<const SCALE: i32>(
17175 src: __m512i,
17176 mask: __mmask16,
17177 offsets: __m512i,
17178 slice: *const i32,
17179) -> __m512i {
17180 static_assert_imm8_scale!(SCALE);
17181 let src: Simd = src.as_i32x16();
17182 let mask: i16 = mask as i16;
17183 let slice: *const i8 = slice as *const i8;
17184 let offsets: Simd = offsets.as_i32x16();
17185 let r: Simd = vpgatherdd(src, slice, offsets, mask, SCALE);
17186 transmute(src:r)
17187}
17188
17189/// Gather 64-bit integers from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst. scale should be 1, 2, 4 or 8.
17190///
17191/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i32gather_epi64&expand=2994)
17192#[inline]
17193#[target_feature(enable = "avx512f")]
17194#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17195#[cfg_attr(test, assert_instr(vpgatherdq, SCALE = 1))]
17196#[rustc_legacy_const_generics(2)]
17197pub unsafe fn _mm512_i32gather_epi64<const SCALE: i32>(
17198 offsets: __m256i,
17199 slice: *const i64,
17200) -> __m512i {
17201 static_assert_imm8_scale!(SCALE);
17202 let zero: Simd = i64x8::ZERO;
17203 let neg_one: i8 = -1;
17204 let slice: *const i8 = slice as *const i8;
17205 let offsets: Simd = offsets.as_i32x8();
17206 let r: Simd = vpgatherdq(src:zero, slice, offsets, mask:neg_one, SCALE);
17207 transmute(src:r)
17208}
17209
17210/// Gather 64-bit integers from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst using writemask k (elements are copied from src when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
17211///
17212/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i32gather_epi64&expand=2995)
17213#[inline]
17214#[target_feature(enable = "avx512f")]
17215#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17216#[cfg_attr(test, assert_instr(vpgatherdq, SCALE = 1))]
17217#[rustc_legacy_const_generics(4)]
17218pub unsafe fn _mm512_mask_i32gather_epi64<const SCALE: i32>(
17219 src: __m512i,
17220 mask: __mmask8,
17221 offsets: __m256i,
17222 slice: *const i64,
17223) -> __m512i {
17224 static_assert_imm8_scale!(SCALE);
17225 let src: Simd = src.as_i64x8();
17226 let mask: i8 = mask as i8;
17227 let slice: *const i8 = slice as *const i8;
17228 let offsets: Simd = offsets.as_i32x8();
17229 let r: Simd = vpgatherdq(src, slice, offsets, mask, SCALE);
17230 transmute(src:r)
17231}
17232
17233/// Gather 64-bit integers from memory using 64-bit indices. 64-bit elements are loaded from addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst. scale should be 1, 2, 4 or 8.
17234///
17235/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i64gather_epi64&expand=3084)
17236#[inline]
17237#[target_feature(enable = "avx512f")]
17238#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17239#[cfg_attr(test, assert_instr(vpgatherqq, SCALE = 1))]
17240#[rustc_legacy_const_generics(2)]
17241pub unsafe fn _mm512_i64gather_epi64<const SCALE: i32>(
17242 offsets: __m512i,
17243 slice: *const i64,
17244) -> __m512i {
17245 static_assert_imm8_scale!(SCALE);
17246 let zero: Simd = i64x8::ZERO;
17247 let neg_one: i8 = -1;
17248 let slice: *const i8 = slice as *const i8;
17249 let offsets: Simd = offsets.as_i64x8();
17250 let r: Simd = vpgatherqq(src:zero, slice, offsets, mask:neg_one, SCALE);
17251 transmute(src:r)
17252}
17253
17254/// Gather 64-bit integers from memory using 64-bit indices. 64-bit elements are loaded from addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst using writemask k (elements are copied from src when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
17255///
17256/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i64gather_epi64&expand=3085)
17257#[inline]
17258#[target_feature(enable = "avx512f")]
17259#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17260#[cfg_attr(test, assert_instr(vpgatherqq, SCALE = 1))]
17261#[rustc_legacy_const_generics(4)]
17262pub unsafe fn _mm512_mask_i64gather_epi64<const SCALE: i32>(
17263 src: __m512i,
17264 mask: __mmask8,
17265 offsets: __m512i,
17266 slice: *const i64,
17267) -> __m512i {
17268 static_assert_imm8_scale!(SCALE);
17269 let src: Simd = src.as_i64x8();
17270 let mask: i8 = mask as i8;
17271 let slice: *const i8 = slice as *const i8;
17272 let offsets: Simd = offsets.as_i64x8();
17273 let r: Simd = vpgatherqq(src, slice, offsets, mask, SCALE);
17274 transmute(src:r)
17275}
17276
17277/// Gather 32-bit integers from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst. scale should be 1, 2, 4 or 8.
17278///
17279/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i64gather_epi32&expand=3074)
17280#[inline]
17281#[target_feature(enable = "avx512f")]
17282#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17283#[cfg_attr(test, assert_instr(vpgatherqd, SCALE = 1))]
17284#[rustc_legacy_const_generics(2)]
17285pub unsafe fn _mm512_i64gather_epi32<const SCALE: i32>(
17286 offsets: __m512i,
17287 slice: *const i32,
17288) -> __m256i {
17289 static_assert_imm8_scale!(SCALE);
17290 let zeros: Simd = i32x8::ZERO;
17291 let neg_one: i8 = -1;
17292 let slice: *const i8 = slice as *const i8;
17293 let offsets: Simd = offsets.as_i64x8();
17294 let r: Simd = vpgatherqd(src:zeros, slice, offsets, mask:neg_one, SCALE);
17295 transmute(src:r)
17296}
17297
17298/// Gather 32-bit integers from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst using writemask k (elements are copied from src when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
17299///
17300/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i64gather_epi32&expand=3075)
17301#[inline]
17302#[target_feature(enable = "avx512f")]
17303#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17304#[cfg_attr(test, assert_instr(vpgatherqd, SCALE = 1))]
17305#[rustc_legacy_const_generics(4)]
17306pub unsafe fn _mm512_mask_i64gather_epi32<const SCALE: i32>(
17307 src: __m256i,
17308 mask: __mmask8,
17309 offsets: __m512i,
17310 slice: *const i32,
17311) -> __m256i {
17312 static_assert_imm8_scale!(SCALE);
17313 let src: Simd = src.as_i32x8();
17314 let mask: i8 = mask as i8;
17315 let slice: *const i8 = slice as *const i8;
17316 let offsets: Simd = offsets.as_i64x8();
17317 let r: Simd = vpgatherqd(src, slice, offsets, mask, SCALE);
17318 transmute(src:r)
17319}
17320
17321/// Scatter double-precision (64-bit) floating-point elements from a into memory using 32-bit indices. 64-bit elements are stored at addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). scale should be 1, 2, 4 or 8.
17322///
17323/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i32scatter_pd&expand=3044)
17324#[inline]
17325#[target_feature(enable = "avx512f")]
17326#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17327#[cfg_attr(test, assert_instr(vscatterdpd, SCALE = 1))]
17328#[rustc_legacy_const_generics(3)]
17329pub unsafe fn _mm512_i32scatter_pd<const SCALE: i32>(
17330 slice: *mut f64,
17331 offsets: __m256i,
17332 src: __m512d,
17333) {
17334 static_assert_imm8_scale!(SCALE);
17335 let src: Simd = src.as_f64x8();
17336 let neg_one: i8 = -1;
17337 let slice: *mut i8 = slice as *mut i8;
17338 let offsets: Simd = offsets.as_i32x8();
17339 vscatterdpd(slice, mask:neg_one, offsets, src, SCALE);
17340}
17341
17342/// Scatter double-precision (64-bit) floating-point elements from a into memory using 32-bit indices. 64-bit elements are stored at addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale) subject to mask k (elements are not stored when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
17343///
17344/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i32scatter_pd&expand=3045)
17345#[inline]
17346#[target_feature(enable = "avx512f")]
17347#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17348#[cfg_attr(test, assert_instr(vscatterdpd, SCALE = 1))]
17349#[rustc_legacy_const_generics(4)]
17350pub unsafe fn _mm512_mask_i32scatter_pd<const SCALE: i32>(
17351 slice: *mut f64,
17352 mask: __mmask8,
17353 offsets: __m256i,
17354 src: __m512d,
17355) {
17356 static_assert_imm8_scale!(SCALE);
17357 let src: Simd = src.as_f64x8();
17358 let slice: *mut i8 = slice as *mut i8;
17359 let offsets: Simd = offsets.as_i32x8();
17360 vscatterdpd(slice, mask as i8, offsets, src, SCALE);
17361}
17362
17363/// Scatter double-precision (64-bit) floating-point elements from a into memory using 64-bit indices. 64-bit elements are stored at addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). scale should be 1, 2, 4 or 8.
17364///
17365/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i64scatter_pd&expand=3122)
17366#[inline]
17367#[target_feature(enable = "avx512f")]
17368#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17369#[cfg_attr(test, assert_instr(vscatterqpd, SCALE = 1))]
17370#[rustc_legacy_const_generics(3)]
17371pub unsafe fn _mm512_i64scatter_pd<const SCALE: i32>(
17372 slice: *mut f64,
17373 offsets: __m512i,
17374 src: __m512d,
17375) {
17376 static_assert_imm8_scale!(SCALE);
17377 let src: Simd = src.as_f64x8();
17378 let neg_one: i8 = -1;
17379 let slice: *mut i8 = slice as *mut i8;
17380 let offsets: Simd = offsets.as_i64x8();
17381 vscatterqpd(slice, mask:neg_one, offsets, src, SCALE);
17382}
17383
17384/// Scatter double-precision (64-bit) floating-point elements from a into memory using 64-bit indices. 64-bit elements are stored at addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale) subject to mask k (elements are not stored when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
17385///
17386/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i64scatter_pd&expand=3123)
17387#[inline]
17388#[target_feature(enable = "avx512f")]
17389#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17390#[cfg_attr(test, assert_instr(vscatterqpd, SCALE = 1))]
17391#[rustc_legacy_const_generics(4)]
17392pub unsafe fn _mm512_mask_i64scatter_pd<const SCALE: i32>(
17393 slice: *mut f64,
17394 mask: __mmask8,
17395 offsets: __m512i,
17396 src: __m512d,
17397) {
17398 static_assert_imm8_scale!(SCALE);
17399 let src: Simd = src.as_f64x8();
17400 let slice: *mut i8 = slice as *mut i8;
17401 let offsets: Simd = offsets.as_i64x8();
17402 vscatterqpd(slice, mask as i8, offsets, src, SCALE);
17403}
17404
17405/// Scatter single-precision (32-bit) floating-point elements from a into memory using 32-bit indices. 32-bit elements are stored at addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). scale should be 1, 2, 4 or 8.
17406///
17407/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i32scatter_ps&expand=3050)
17408#[inline]
17409#[target_feature(enable = "avx512f")]
17410#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17411#[cfg_attr(test, assert_instr(vscatterdps, SCALE = 1))]
17412#[rustc_legacy_const_generics(3)]
17413pub unsafe fn _mm512_i32scatter_ps<const SCALE: i32>(
17414 slice: *mut f32,
17415 offsets: __m512i,
17416 src: __m512,
17417) {
17418 static_assert_imm8_scale!(SCALE);
17419 let src: Simd = src.as_f32x16();
17420 let neg_one: i16 = -1;
17421 let slice: *mut i8 = slice as *mut i8;
17422 let offsets: Simd = offsets.as_i32x16();
17423 vscatterdps(slice, mask:neg_one, offsets, src, SCALE);
17424}
17425
17426/// Scatter single-precision (32-bit) floating-point elements from a into memory using 32-bit indices. 32-bit elements are stored at addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale) subject to mask k (elements are not stored when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
17427///
17428/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i32scatter_ps&expand=3051)
17429#[inline]
17430#[target_feature(enable = "avx512f")]
17431#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17432#[cfg_attr(test, assert_instr(vscatterdps, SCALE = 1))]
17433#[rustc_legacy_const_generics(4)]
17434pub unsafe fn _mm512_mask_i32scatter_ps<const SCALE: i32>(
17435 slice: *mut f32,
17436 mask: __mmask16,
17437 offsets: __m512i,
17438 src: __m512,
17439) {
17440 static_assert_imm8_scale!(SCALE);
17441 let src: Simd = src.as_f32x16();
17442 let slice: *mut i8 = slice as *mut i8;
17443 let offsets: Simd = offsets.as_i32x16();
17444 vscatterdps(slice, mask as i16, offsets, src, SCALE);
17445}
17446
17447/// Scatter single-precision (32-bit) floating-point elements from a into memory using 64-bit indices. 32-bit elements are stored at addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale) subject to mask k (elements are not stored when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
17448///
17449/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i64scatter_ps&expand=3128)
17450#[inline]
17451#[target_feature(enable = "avx512f")]
17452#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17453#[cfg_attr(test, assert_instr(vscatterqps, SCALE = 1))]
17454#[rustc_legacy_const_generics(3)]
17455pub unsafe fn _mm512_i64scatter_ps<const SCALE: i32>(
17456 slice: *mut f32,
17457 offsets: __m512i,
17458 src: __m256,
17459) {
17460 static_assert_imm8_scale!(SCALE);
17461 let src: Simd = src.as_f32x8();
17462 let neg_one: i8 = -1;
17463 let slice: *mut i8 = slice as *mut i8;
17464 let offsets: Simd = offsets.as_i64x8();
17465 vscatterqps(slice, mask:neg_one, offsets, src, SCALE);
17466}
17467
17468/// Scatter single-precision (32-bit) floating-point elements from a into memory using 64-bit indices. 32-bit elements are stored at addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale) subject to mask k (elements are not stored when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
17469///
17470/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i64scatter_ps&expand=3129)
17471#[inline]
17472#[target_feature(enable = "avx512f")]
17473#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17474#[cfg_attr(test, assert_instr(vscatterqps, SCALE = 1))]
17475#[rustc_legacy_const_generics(4)]
17476pub unsafe fn _mm512_mask_i64scatter_ps<const SCALE: i32>(
17477 slice: *mut f32,
17478 mask: __mmask8,
17479 offsets: __m512i,
17480 src: __m256,
17481) {
17482 static_assert_imm8_scale!(SCALE);
17483 let src: Simd = src.as_f32x8();
17484 let slice: *mut i8 = slice as *mut i8;
17485 let offsets: Simd = offsets.as_i64x8();
17486 vscatterqps(slice, mask as i8, offsets, src, SCALE);
17487}
17488
17489/// Scatter 64-bit integers from a into memory using 32-bit indices. 64-bit elements are stored at addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). scale should be 1, 2, 4 or 8.
17490///
17491/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i32scatter_epi64&expand=3038)
17492#[inline]
17493#[target_feature(enable = "avx512f")]
17494#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17495#[cfg_attr(test, assert_instr(vpscatterdq, SCALE = 1))]
17496#[rustc_legacy_const_generics(3)]
17497pub unsafe fn _mm512_i32scatter_epi64<const SCALE: i32>(
17498 slice: *mut i64,
17499 offsets: __m256i,
17500 src: __m512i,
17501) {
17502 static_assert_imm8_scale!(SCALE);
17503 let src: Simd = src.as_i64x8();
17504 let neg_one: i8 = -1;
17505 let slice: *mut i8 = slice as *mut i8;
17506 let offsets: Simd = offsets.as_i32x8();
17507 vpscatterdq(slice, mask:neg_one, offsets, src, SCALE);
17508}
17509
17510/// Scatter 64-bit integers from a into memory using 32-bit indices. 64-bit elements are stored at addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale) subject to mask k (elements are not stored when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
17511///
17512/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i32scatter_epi64&expand=3039)
17513#[inline]
17514#[target_feature(enable = "avx512f")]
17515#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17516#[cfg_attr(test, assert_instr(vpscatterdq, SCALE = 1))]
17517#[rustc_legacy_const_generics(4)]
17518pub unsafe fn _mm512_mask_i32scatter_epi64<const SCALE: i32>(
17519 slice: *mut i64,
17520 mask: __mmask8,
17521 offsets: __m256i,
17522 src: __m512i,
17523) {
17524 static_assert_imm8_scale!(SCALE);
17525 let src: Simd = src.as_i64x8();
17526 let mask: i8 = mask as i8;
17527 let slice: *mut i8 = slice as *mut i8;
17528 let offsets: Simd = offsets.as_i32x8();
17529 vpscatterdq(slice, mask, offsets, src, SCALE);
17530}
17531
17532/// Scatter 64-bit integers from a into memory using 64-bit indices. 64-bit elements are stored at addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). scale should be 1, 2, 4 or 8.
17533///
17534/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i64scatter_epi64&expand=3116)
17535#[inline]
17536#[target_feature(enable = "avx512f")]
17537#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17538#[cfg_attr(test, assert_instr(vpscatterqq, SCALE = 1))]
17539#[rustc_legacy_const_generics(3)]
17540pub unsafe fn _mm512_i64scatter_epi64<const SCALE: i32>(
17541 slice: *mut i64,
17542 offsets: __m512i,
17543 src: __m512i,
17544) {
17545 static_assert_imm8_scale!(SCALE);
17546 let src: Simd = src.as_i64x8();
17547 let neg_one: i8 = -1;
17548 let slice: *mut i8 = slice as *mut i8;
17549 let offsets: Simd = offsets.as_i64x8();
17550 vpscatterqq(slice, mask:neg_one, offsets, src, SCALE);
17551}
17552
17553/// Scatter 64-bit integers from a into memory using 64-bit indices. 64-bit elements are stored at addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale) subject to mask k (elements are not stored when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
17554///
17555/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i64scatter_epi64&expand=3117)
17556#[inline]
17557#[target_feature(enable = "avx512f")]
17558#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17559#[cfg_attr(test, assert_instr(vpscatterqq, SCALE = 1))]
17560#[rustc_legacy_const_generics(4)]
17561pub unsafe fn _mm512_mask_i64scatter_epi64<const SCALE: i32>(
17562 slice: *mut i64,
17563 mask: __mmask8,
17564 offsets: __m512i,
17565 src: __m512i,
17566) {
17567 static_assert_imm8_scale!(SCALE);
17568 let src: Simd = src.as_i64x8();
17569 let mask: i8 = mask as i8;
17570 let slice: *mut i8 = slice as *mut i8;
17571 let offsets: Simd = offsets.as_i64x8();
17572 vpscatterqq(slice, mask, offsets, src, SCALE);
17573}
17574
17575/// Scatter 32-bit integers from a into memory using 32-bit indices. 32-bit elements are stored at addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). scale should be 1, 2, 4 or 8.
17576///
17577/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i32scatter_epi32&expand=3032)
17578#[inline]
17579#[target_feature(enable = "avx512f")]
17580#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17581#[cfg_attr(test, assert_instr(vpscatterdd, SCALE = 1))]
17582#[rustc_legacy_const_generics(3)]
17583pub unsafe fn _mm512_i32scatter_epi32<const SCALE: i32>(
17584 slice: *mut i32,
17585 offsets: __m512i,
17586 src: __m512i,
17587) {
17588 static_assert_imm8_scale!(SCALE);
17589 let src: Simd = src.as_i32x16();
17590 let neg_one: i16 = -1;
17591 let slice: *mut i8 = slice as *mut i8;
17592 let offsets: Simd = offsets.as_i32x16();
17593 vpscatterdd(slice, mask:neg_one, offsets, src, SCALE);
17594}
17595
17596/// Scatter 32-bit integers from a into memory using 32-bit indices. 32-bit elements are stored at addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale) subject to mask k (elements are not stored when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
17597///
17598/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i32scatter_epi32&expand=3033)
17599#[inline]
17600#[target_feature(enable = "avx512f")]
17601#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17602#[cfg_attr(test, assert_instr(vpscatterdd, SCALE = 1))]
17603#[rustc_legacy_const_generics(4)]
17604pub unsafe fn _mm512_mask_i32scatter_epi32<const SCALE: i32>(
17605 slice: *mut i32,
17606 mask: __mmask16,
17607 offsets: __m512i,
17608 src: __m512i,
17609) {
17610 static_assert_imm8_scale!(SCALE);
17611 let src: Simd = src.as_i32x16();
17612 let mask: i16 = mask as i16;
17613 let slice: *mut i8 = slice as *mut i8;
17614 let offsets: Simd = offsets.as_i32x16();
17615 vpscatterdd(slice, mask, offsets, src, SCALE);
17616}
17617
17618/// Scatter 32-bit integers from a into memory using 64-bit indices. 32-bit elements are stored at addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). scale should be 1, 2, 4 or 8.
17619///
17620/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i64scatter_epi32&expand=3108)
17621#[inline]
17622#[target_feature(enable = "avx512f")]
17623#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17624#[cfg_attr(test, assert_instr(vpscatterqd, SCALE = 1))]
17625#[rustc_legacy_const_generics(3)]
17626pub unsafe fn _mm512_i64scatter_epi32<const SCALE: i32>(
17627 slice: *mut i32,
17628 offsets: __m512i,
17629 src: __m256i,
17630) {
17631 static_assert_imm8_scale!(SCALE);
17632 let src: Simd = src.as_i32x8();
17633 let neg_one: i8 = -1;
17634 let slice: *mut i8 = slice as *mut i8;
17635 let offsets: Simd = offsets.as_i64x8();
17636 vpscatterqd(slice, mask:neg_one, offsets, src, SCALE);
17637}
17638
17639/// Scatter 32-bit integers from a into memory using 64-bit indices. 32-bit elements are stored at addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale) subject to mask k (elements are not stored when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
17640///
17641/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i64scatter_epi32&expand=3109)
17642#[inline]
17643#[target_feature(enable = "avx512f")]
17644#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17645#[cfg_attr(test, assert_instr(vpscatterqd, SCALE = 1))]
17646#[rustc_legacy_const_generics(4)]
17647pub unsafe fn _mm512_mask_i64scatter_epi32<const SCALE: i32>(
17648 slice: *mut i32,
17649 mask: __mmask8,
17650 offsets: __m512i,
17651 src: __m256i,
17652) {
17653 static_assert_imm8_scale!(SCALE);
17654 let src: Simd = src.as_i32x8();
17655 let mask: i8 = mask as i8;
17656 let slice: *mut i8 = slice as *mut i8;
17657 let offsets: Simd = offsets.as_i64x8();
17658 vpscatterqd(slice, mask, offsets, src, SCALE);
17659}
17660
17661/// Loads 8 64-bit integer elements from memory starting at location base_addr at packed 32-bit integer
17662/// indices stored in the lower half of vindex scaled by scale and stores them in dst.
17663///
17664/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_i32logather_epi64)
17665#[inline]
17666#[target_feature(enable = "avx512f")]
17667#[cfg_attr(test, assert_instr(vpgatherdq, SCALE = 1))]
17668#[rustc_legacy_const_generics(2)]
17669#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17670pub unsafe fn _mm512_i32logather_epi64<const SCALE: i32>(
17671 vindex: __m512i,
17672 base_addr: *const i64,
17673) -> __m512i {
17674 _mm512_i32gather_epi64::<SCALE>(offsets:_mm512_castsi512_si256(vindex), slice:base_addr)
17675}
17676
17677/// Loads 8 64-bit integer elements from memory starting at location base_addr at packed 32-bit integer
17678/// indices stored in the lower half of vindex scaled by scale and stores them in dst using writemask k
17679/// (elements are copied from src when the corresponding mask bit is not set).
17680///
17681/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_i32logather_epi64)
17682#[inline]
17683#[target_feature(enable = "avx512f")]
17684#[cfg_attr(test, assert_instr(vpgatherdq, SCALE = 1))]
17685#[rustc_legacy_const_generics(4)]
17686#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17687pub unsafe fn _mm512_mask_i32logather_epi64<const SCALE: i32>(
17688 src: __m512i,
17689 k: __mmask8,
17690 vindex: __m512i,
17691 base_addr: *const i64,
17692) -> __m512i {
17693 _mm512_mask_i32gather_epi64::<SCALE>(src, mask:k, offsets:_mm512_castsi512_si256(vindex), slice:base_addr)
17694}
17695
17696/// Loads 8 double-precision (64-bit) floating-point elements from memory starting at location base_addr
17697/// at packed 32-bit integer indices stored in the lower half of vindex scaled by scale and stores them in dst.
17698///
17699/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_i32logather_pd)
17700#[inline]
17701#[target_feature(enable = "avx512f")]
17702#[cfg_attr(test, assert_instr(vgatherdpd, SCALE = 1))]
17703#[rustc_legacy_const_generics(2)]
17704#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17705pub unsafe fn _mm512_i32logather_pd<const SCALE: i32>(
17706 vindex: __m512i,
17707 base_addr: *const f64,
17708) -> __m512d {
17709 _mm512_i32gather_pd::<SCALE>(offsets:_mm512_castsi512_si256(vindex), slice:base_addr)
17710}
17711
17712/// Loads 8 double-precision (64-bit) floating-point elements from memory starting at location base_addr
17713/// at packed 32-bit integer indices stored in the lower half of vindex scaled by scale and stores them in dst
17714/// using writemask k (elements are copied from src when the corresponding mask bit is not set).
17715///
17716/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_i32logather_pd)
17717#[inline]
17718#[target_feature(enable = "avx512f")]
17719#[cfg_attr(test, assert_instr(vgatherdpd, SCALE = 1))]
17720#[rustc_legacy_const_generics(4)]
17721#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17722pub unsafe fn _mm512_mask_i32logather_pd<const SCALE: i32>(
17723 src: __m512d,
17724 k: __mmask8,
17725 vindex: __m512i,
17726 base_addr: *const f64,
17727) -> __m512d {
17728 _mm512_mask_i32gather_pd::<SCALE>(src, mask:k, offsets:_mm512_castsi512_si256(vindex), slice:base_addr)
17729}
17730
17731/// Stores 8 64-bit integer elements from a to memory starting at location base_addr at packed 32-bit integer
17732/// indices stored in the lower half of vindex scaled by scale.
17733///
17734/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_i32loscatter_epi64)
17735#[inline]
17736#[target_feature(enable = "avx512f")]
17737#[cfg_attr(test, assert_instr(vpscatterdq, SCALE = 1))]
17738#[rustc_legacy_const_generics(3)]
17739#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17740pub unsafe fn _mm512_i32loscatter_epi64<const SCALE: i32>(
17741 base_addr: *mut i64,
17742 vindex: __m512i,
17743 a: __m512i,
17744) {
17745 _mm512_i32scatter_epi64::<SCALE>(slice:base_addr, offsets:_mm512_castsi512_si256(vindex), src:a)
17746}
17747
17748/// Stores 8 64-bit integer elements from a to memory starting at location base_addr at packed 32-bit integer
17749/// indices stored in the lower half of vindex scaled by scale using writemask k (elements whose corresponding
17750/// mask bit is not set are not written to memory).
17751///
17752/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_i32loscatter_epi64)
17753#[inline]
17754#[target_feature(enable = "avx512f")]
17755#[cfg_attr(test, assert_instr(vpscatterdq, SCALE = 1))]
17756#[rustc_legacy_const_generics(4)]
17757#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17758pub unsafe fn _mm512_mask_i32loscatter_epi64<const SCALE: i32>(
17759 base_addr: *mut i64,
17760 k: __mmask8,
17761 vindex: __m512i,
17762 a: __m512i,
17763) {
17764 _mm512_mask_i32scatter_epi64::<SCALE>(slice:base_addr, mask:k, offsets:_mm512_castsi512_si256(vindex), src:a)
17765}
17766
17767/// Stores 8 double-precision (64-bit) floating-point elements from a to memory starting at location base_addr
17768/// at packed 32-bit integer indices stored in the lower half of vindex scaled by scale.
17769///
17770/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_i32loscatter_pd)
17771#[inline]
17772#[target_feature(enable = "avx512f")]
17773#[cfg_attr(test, assert_instr(vscatterdpd, SCALE = 1))]
17774#[rustc_legacy_const_generics(3)]
17775#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17776pub unsafe fn _mm512_i32loscatter_pd<const SCALE: i32>(
17777 base_addr: *mut f64,
17778 vindex: __m512i,
17779 a: __m512d,
17780) {
17781 _mm512_i32scatter_pd::<SCALE>(slice:base_addr, offsets:_mm512_castsi512_si256(vindex), src:a)
17782}
17783
17784/// Stores 8 double-precision (64-bit) floating-point elements from a to memory starting at location base_addr
17785/// at packed 32-bit integer indices stored in the lower half of vindex scaled by scale using writemask k
17786/// (elements whose corresponding mask bit is not set are not written to memory).
17787///
17788/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_i32loscatter_pd)
17789#[inline]
17790#[target_feature(enable = "avx512f")]
17791#[cfg_attr(test, assert_instr(vscatterdpd, SCALE = 1))]
17792#[rustc_legacy_const_generics(4)]
17793#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17794pub unsafe fn _mm512_mask_i32loscatter_pd<const SCALE: i32>(
17795 base_addr: *mut f64,
17796 k: __mmask8,
17797 vindex: __m512i,
17798 a: __m512d,
17799) {
17800 _mm512_mask_i32scatter_pd::<SCALE>(slice:base_addr, mask:k, offsets:_mm512_castsi512_si256(vindex), src:a)
17801}
17802
17803/// Stores 8 32-bit integer elements from a to memory starting at location base_addr at packed 32-bit integer
17804/// indices stored in vindex scaled by scale
17805///
17806/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_i32scatter_epi32)
17807#[inline]
17808#[target_feature(enable = "avx512f,avx512vl")]
17809#[cfg_attr(test, assert_instr(vpscatterdd, SCALE = 1))]
17810#[rustc_legacy_const_generics(3)]
17811#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17812pub unsafe fn _mm256_i32scatter_epi32<const SCALE: i32>(
17813 base_addr: *mut i32,
17814 vindex: __m256i,
17815 a: __m256i,
17816) {
17817 static_assert_imm8_scale!(SCALE);
17818 vpscatterdd_256(slice:base_addr as _, k:0xff, offsets:vindex.as_i32x8(), src:a.as_i32x8(), SCALE)
17819}
17820
17821/// Stores 8 32-bit integer elements from a to memory starting at location base_addr at packed 32-bit integer
17822/// indices stored in vindex scaled by scale using writemask k (elements whose corresponding mask bit is not set
17823/// are not written to memory).
17824///
17825/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_i32scatter_epi32)
17826#[inline]
17827#[target_feature(enable = "avx512f,avx512vl")]
17828#[cfg_attr(test, assert_instr(vpscatterdd, SCALE = 1))]
17829#[rustc_legacy_const_generics(4)]
17830#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17831pub unsafe fn _mm256_mask_i32scatter_epi32<const SCALE: i32>(
17832 base_addr: *mut i32,
17833 k: __mmask8,
17834 vindex: __m256i,
17835 a: __m256i,
17836) {
17837 static_assert_imm8_scale!(SCALE);
17838 vpscatterdd_256(slice:base_addr as _, k, offsets:vindex.as_i32x8(), src:a.as_i32x8(), SCALE)
17839}
17840
17841/// Scatter 64-bit integers from a into memory using 32-bit indices. 64-bit elements are stored at addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). scale should be 1, 2, 4 or 8.
17842///
17843/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_i32scatter_epi64&expand=4099)
17844#[inline]
17845#[target_feature(enable = "avx512f,avx512vl")]
17846#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17847#[cfg_attr(test, assert_instr(vpscatterdq, SCALE = 1))]
17848#[rustc_legacy_const_generics(3)]
17849pub unsafe fn _mm256_i32scatter_epi64<const SCALE: i32>(
17850 slice: *mut i64,
17851 offsets: __m128i,
17852 src: __m256i,
17853) {
17854 static_assert_imm8_scale!(SCALE);
17855 let src: Simd = src.as_i64x4();
17856 let slice: *mut i8 = slice as *mut i8;
17857 let offsets: Simd = offsets.as_i32x4();
17858 vpscatterdq_256(slice, k:0xff, offsets, src, SCALE);
17859}
17860
17861/// Stores 4 64-bit integer elements from a to memory starting at location base_addr at packed 32-bit integer
17862/// indices stored in vindex scaled by scale using writemask k (elements whose corresponding mask bit is not set
17863/// are not written to memory).
17864///
17865/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_i32scatter_epi64)
17866#[inline]
17867#[target_feature(enable = "avx512f,avx512vl")]
17868#[cfg_attr(test, assert_instr(vpscatterdq, SCALE = 1))]
17869#[rustc_legacy_const_generics(4)]
17870#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17871pub unsafe fn _mm256_mask_i32scatter_epi64<const SCALE: i32>(
17872 base_addr: *mut i64,
17873 k: __mmask8,
17874 vindex: __m128i,
17875 a: __m256i,
17876) {
17877 static_assert_imm8_scale!(SCALE);
17878 vpscatterdq_256(slice:base_addr as _, k, offsets:vindex.as_i32x4(), src:a.as_i64x4(), SCALE)
17879}
17880
17881/// Stores 4 double-precision (64-bit) floating-point elements from a to memory starting at location base_addr
17882/// at packed 32-bit integer indices stored in vindex scaled by scale
17883///
17884/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_i32scatter_pd)
17885#[inline]
17886#[target_feature(enable = "avx512f,avx512vl")]
17887#[cfg_attr(test, assert_instr(vscatterdpd, SCALE = 1))]
17888#[rustc_legacy_const_generics(3)]
17889#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17890pub unsafe fn _mm256_i32scatter_pd<const SCALE: i32>(
17891 base_addr: *mut f64,
17892 vindex: __m128i,
17893 a: __m256d,
17894) {
17895 static_assert_imm8_scale!(SCALE);
17896 vscatterdpd_256(slice:base_addr as _, k:0xff, offsets:vindex.as_i32x4(), src:a.as_f64x4(), SCALE)
17897}
17898
17899/// Stores 4 double-precision (64-bit) floating-point elements from a to memory starting at location base_addr
17900/// at packed 32-bit integer indices stored in vindex scaled by scale using writemask k (elements whose corresponding
17901/// mask bit is not set are not written to memory).
17902///
17903/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_i32scatter_pd)
17904#[inline]
17905#[target_feature(enable = "avx512f,avx512vl")]
17906#[cfg_attr(test, assert_instr(vscatterdpd, SCALE = 1))]
17907#[rustc_legacy_const_generics(4)]
17908#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17909pub unsafe fn _mm256_mask_i32scatter_pd<const SCALE: i32>(
17910 base_addr: *mut f64,
17911 k: __mmask8,
17912 vindex: __m128i,
17913 a: __m256d,
17914) {
17915 static_assert_imm8_scale!(SCALE);
17916 vscatterdpd_256(slice:base_addr as _, k, offsets:vindex.as_i32x4(), src:a.as_f64x4(), SCALE)
17917}
17918
17919/// Stores 8 single-precision (32-bit) floating-point elements from a to memory starting at location base_addr
17920/// at packed 32-bit integer indices stored in vindex scaled by scale
17921///
17922/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_i32scatter_ps)
17923#[inline]
17924#[target_feature(enable = "avx512f,avx512vl")]
17925#[cfg_attr(test, assert_instr(vscatterdps, SCALE = 1))]
17926#[rustc_legacy_const_generics(3)]
17927#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17928pub unsafe fn _mm256_i32scatter_ps<const SCALE: i32>(
17929 base_addr: *mut f32,
17930 vindex: __m256i,
17931 a: __m256,
17932) {
17933 static_assert_imm8_scale!(SCALE);
17934 vscatterdps_256(slice:base_addr as _, k:0xff, offsets:vindex.as_i32x8(), src:a.as_f32x8(), SCALE)
17935}
17936
17937/// Stores 8 single-precision (32-bit) floating-point elements from a to memory starting at location base_addr
17938/// at packed 32-bit integer indices stored in vindex scaled by scale using writemask k (elements whose corresponding
17939/// mask bit is not set are not written to memory).
17940///
17941/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_i32scatter_ps)
17942#[inline]
17943#[target_feature(enable = "avx512f,avx512vl")]
17944#[cfg_attr(test, assert_instr(vscatterdps, SCALE = 1))]
17945#[rustc_legacy_const_generics(4)]
17946#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17947pub unsafe fn _mm256_mask_i32scatter_ps<const SCALE: i32>(
17948 base_addr: *mut f32,
17949 k: __mmask8,
17950 vindex: __m256i,
17951 a: __m256,
17952) {
17953 static_assert_imm8_scale!(SCALE);
17954 vscatterdps_256(slice:base_addr as _, k, offsets:vindex.as_i32x8(), src:a.as_f32x8(), SCALE)
17955}
17956
17957/// Stores 4 32-bit integer elements from a to memory starting at location base_addr at packed 64-bit integer
17958/// indices stored in vindex scaled by scale
17959///
17960/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_i64scatter_epi32)
17961#[inline]
17962#[target_feature(enable = "avx512f,avx512vl")]
17963#[cfg_attr(test, assert_instr(vpscatterqd, SCALE = 1))]
17964#[rustc_legacy_const_generics(3)]
17965#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17966pub unsafe fn _mm256_i64scatter_epi32<const SCALE: i32>(
17967 base_addr: *mut i32,
17968 vindex: __m256i,
17969 a: __m128i,
17970) {
17971 static_assert_imm8_scale!(SCALE);
17972 vpscatterqd_256(slice:base_addr as _, k:0xff, offsets:vindex.as_i64x4(), src:a.as_i32x4(), SCALE)
17973}
17974
17975/// Stores 4 32-bit integer elements from a to memory starting at location base_addr at packed 64-bit integer
17976/// indices stored in vindex scaled by scale using writemask k (elements whose corresponding mask bit is not set
17977/// are not written to memory).
17978///
17979/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_i64scatter_epi32)
17980#[inline]
17981#[target_feature(enable = "avx512f,avx512vl")]
17982#[cfg_attr(test, assert_instr(vpscatterqd, SCALE = 1))]
17983#[rustc_legacy_const_generics(4)]
17984#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17985pub unsafe fn _mm256_mask_i64scatter_epi32<const SCALE: i32>(
17986 base_addr: *mut i32,
17987 k: __mmask8,
17988 vindex: __m256i,
17989 a: __m128i,
17990) {
17991 static_assert_imm8_scale!(SCALE);
17992 vpscatterqd_256(slice:base_addr as _, k, offsets:vindex.as_i64x4(), src:a.as_i32x4(), SCALE)
17993}
17994
17995/// Stores 4 64-bit integer elements from a to memory starting at location base_addr at packed 64-bit integer
17996/// indices stored in vindex scaled by scale
17997///
17998/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_i64scatter_epi64)
17999#[inline]
18000#[target_feature(enable = "avx512f,avx512vl")]
18001#[cfg_attr(test, assert_instr(vpscatterqq, SCALE = 1))]
18002#[rustc_legacy_const_generics(3)]
18003#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18004pub unsafe fn _mm256_i64scatter_epi64<const SCALE: i32>(
18005 base_addr: *mut i64,
18006 vindex: __m256i,
18007 a: __m256i,
18008) {
18009 static_assert_imm8_scale!(SCALE);
18010 vpscatterqq_256(slice:base_addr as _, k:0xff, offsets:vindex.as_i64x4(), src:a.as_i64x4(), SCALE)
18011}
18012
18013/// Stores 4 64-bit integer elements from a to memory starting at location base_addr at packed 64-bit integer
18014/// indices stored in vindex scaled by scale using writemask k (elements whose corresponding mask bit is not set
18015/// are not written to memory).
18016///
18017/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_i64scatter_epi64)
18018#[inline]
18019#[target_feature(enable = "avx512f,avx512vl")]
18020#[cfg_attr(test, assert_instr(vpscatterqq, SCALE = 1))]
18021#[rustc_legacy_const_generics(4)]
18022#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18023pub unsafe fn _mm256_mask_i64scatter_epi64<const SCALE: i32>(
18024 base_addr: *mut i64,
18025 k: __mmask8,
18026 vindex: __m256i,
18027 a: __m256i,
18028) {
18029 static_assert_imm8_scale!(SCALE);
18030 vpscatterqq_256(slice:base_addr as _, k, offsets:vindex.as_i64x4(), src:a.as_i64x4(), SCALE)
18031}
18032
18033/// Stores 4 double-precision (64-bit) floating-point elements from a to memory starting at location base_addr
18034/// at packed 64-bit integer indices stored in vindex scaled by scale
18035///
18036/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_i64scatter_pd)
18037#[inline]
18038#[target_feature(enable = "avx512f,avx512vl")]
18039#[cfg_attr(test, assert_instr(vscatterqpd, SCALE = 1))]
18040#[rustc_legacy_const_generics(3)]
18041#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18042pub unsafe fn _mm256_i64scatter_pd<const SCALE: i32>(
18043 base_addr: *mut f64,
18044 vindex: __m256i,
18045 a: __m256d,
18046) {
18047 static_assert_imm8_scale!(SCALE);
18048 vscatterqpd_256(slice:base_addr as _, k:0xff, offsets:vindex.as_i64x4(), src:a.as_f64x4(), SCALE)
18049}
18050
18051/// Stores 4 double-precision (64-bit) floating-point elements from a to memory starting at location base_addr
18052/// at packed 64-bit integer indices stored in vindex scaled by scale using writemask k (elements whose corresponding
18053/// mask bit is not set are not written to memory).
18054///
18055/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_i64scatter_pd)
18056#[inline]
18057#[target_feature(enable = "avx512f,avx512vl")]
18058#[cfg_attr(test, assert_instr(vscatterqpd, SCALE = 1))]
18059#[rustc_legacy_const_generics(4)]
18060#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18061pub unsafe fn _mm256_mask_i64scatter_pd<const SCALE: i32>(
18062 base_addr: *mut f64,
18063 k: __mmask8,
18064 vindex: __m256i,
18065 a: __m256d,
18066) {
18067 static_assert_imm8_scale!(SCALE);
18068 vscatterqpd_256(slice:base_addr as _, k, offsets:vindex.as_i64x4(), src:a.as_f64x4(), SCALE)
18069}
18070
18071/// Stores 4 single-precision (32-bit) floating-point elements from a to memory starting at location base_addr
18072/// at packed 64-bit integer indices stored in vindex scaled by scale
18073///
18074/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_i64scatter_ps)
18075#[inline]
18076#[target_feature(enable = "avx512f,avx512vl")]
18077#[cfg_attr(test, assert_instr(vscatterqps, SCALE = 1))]
18078#[rustc_legacy_const_generics(3)]
18079#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18080pub unsafe fn _mm256_i64scatter_ps<const SCALE: i32>(
18081 base_addr: *mut f32,
18082 vindex: __m256i,
18083 a: __m128,
18084) {
18085 static_assert_imm8_scale!(SCALE);
18086 vscatterqps_256(slice:base_addr as _, k:0xff, offsets:vindex.as_i64x4(), src:a.as_f32x4(), SCALE)
18087}
18088
18089/// Stores 4 single-precision (32-bit) floating-point elements from a to memory starting at location base_addr
18090/// at packed 64-bit integer indices stored in vindex scaled by scale using writemask k (elements whose corresponding
18091/// mask bit is not set are not written to memory).
18092///
18093/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_i64scatter_ps)
18094#[inline]
18095#[target_feature(enable = "avx512f,avx512vl")]
18096#[cfg_attr(test, assert_instr(vscatterqps, SCALE = 1))]
18097#[rustc_legacy_const_generics(4)]
18098#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18099pub unsafe fn _mm256_mask_i64scatter_ps<const SCALE: i32>(
18100 base_addr: *mut f32,
18101 k: __mmask8,
18102 vindex: __m256i,
18103 a: __m128,
18104) {
18105 static_assert_imm8_scale!(SCALE);
18106 vscatterqps_256(slice:base_addr as _, k, offsets:vindex.as_i64x4(), src:a.as_f32x4(), SCALE)
18107}
18108
18109/// Loads 8 32-bit integer elements from memory starting at location base_addr at packed 32-bit integer
18110/// indices stored in vindex scaled by scale using writemask k (elements are copied from src when the corresponding
18111/// mask bit is not set).
18112///
18113/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mmask_i32gather_epi32)
18114#[inline]
18115#[target_feature(enable = "avx512f,avx512vl")]
18116#[cfg_attr(test, assert_instr(vpgatherdd, SCALE = 1))]
18117#[rustc_legacy_const_generics(4)]
18118#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18119pub unsafe fn _mm256_mmask_i32gather_epi32<const SCALE: i32>(
18120 src: __m256i,
18121 k: __mmask8,
18122 vindex: __m256i,
18123 base_addr: *const i32,
18124) -> __m256i {
18125 static_assert_imm8_scale!(SCALE);
18126 transmute(src:vpgatherdd_256(
18127 src.as_i32x8(),
18128 slice:base_addr as _,
18129 offsets:vindex.as_i32x8(),
18130 k,
18131 SCALE,
18132 ))
18133}
18134
18135/// Loads 4 64-bit integer elements from memory starting at location base_addr at packed 32-bit integer
18136/// indices stored in vindex scaled by scale using writemask k (elements are copied from src when the corresponding
18137/// mask bit is not set).
18138///
18139/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mmask_i32gather_epi64)
18140#[inline]
18141#[target_feature(enable = "avx512f,avx512vl")]
18142#[cfg_attr(test, assert_instr(vpgatherdq, SCALE = 1))]
18143#[rustc_legacy_const_generics(4)]
18144#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18145pub unsafe fn _mm256_mmask_i32gather_epi64<const SCALE: i32>(
18146 src: __m256i,
18147 k: __mmask8,
18148 vindex: __m128i,
18149 base_addr: *const i64,
18150) -> __m256i {
18151 static_assert_imm8_scale!(SCALE);
18152 transmute(src:vpgatherdq_256(
18153 src.as_i64x4(),
18154 slice:base_addr as _,
18155 offsets:vindex.as_i32x4(),
18156 k,
18157 SCALE,
18158 ))
18159}
18160
18161/// Loads 4 double-precision (64-bit) floating-point elements from memory starting at location base_addr
18162/// at packed 32-bit integer indices stored in vindex scaled by scale using writemask k (elements are copied
18163/// from src when the corresponding mask bit is not set).
18164///
18165/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mmask_i32gather_pd)
18166#[inline]
18167#[target_feature(enable = "avx512f,avx512vl")]
18168#[cfg_attr(test, assert_instr(vgatherdpd, SCALE = 1))]
18169#[rustc_legacy_const_generics(4)]
18170#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18171pub unsafe fn _mm256_mmask_i32gather_pd<const SCALE: i32>(
18172 src: __m256d,
18173 k: __mmask8,
18174 vindex: __m128i,
18175 base_addr: *const f64,
18176) -> __m256d {
18177 static_assert_imm8_scale!(SCALE);
18178 transmute(src:vgatherdpd_256(
18179 src.as_f64x4(),
18180 slice:base_addr as _,
18181 offsets:vindex.as_i32x4(),
18182 k,
18183 SCALE,
18184 ))
18185}
18186
18187/// Loads 8 single-precision (32-bit) floating-point elements from memory starting at location base_addr
18188/// at packed 32-bit integer indices stored in vindex scaled by scale using writemask k (elements are copied
18189/// from src when the corresponding mask bit is not set).
18190///
18191/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mmask_i32gather_ps)
18192#[inline]
18193#[target_feature(enable = "avx512f,avx512vl")]
18194#[cfg_attr(test, assert_instr(vgatherdps, SCALE = 1))]
18195#[rustc_legacy_const_generics(4)]
18196#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18197pub unsafe fn _mm256_mmask_i32gather_ps<const SCALE: i32>(
18198 src: __m256,
18199 k: __mmask8,
18200 vindex: __m256i,
18201 base_addr: *const f32,
18202) -> __m256 {
18203 static_assert_imm8_scale!(SCALE);
18204 transmute(src:vgatherdps_256(
18205 src.as_f32x8(),
18206 slice:base_addr as _,
18207 offsets:vindex.as_i32x8(),
18208 k,
18209 SCALE,
18210 ))
18211}
18212
18213/// Loads 4 32-bit integer elements from memory starting at location base_addr at packed 64-bit integer
18214/// indices stored in vindex scaled by scale using writemask k (elements are copied from src when the corresponding
18215/// mask bit is not set).
18216///
18217/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mmask_i64gather_epi32)
18218#[inline]
18219#[target_feature(enable = "avx512f,avx512vl")]
18220#[cfg_attr(test, assert_instr(vpgatherqd, SCALE = 1))]
18221#[rustc_legacy_const_generics(4)]
18222#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18223pub unsafe fn _mm256_mmask_i64gather_epi32<const SCALE: i32>(
18224 src: __m128i,
18225 k: __mmask8,
18226 vindex: __m256i,
18227 base_addr: *const i32,
18228) -> __m128i {
18229 static_assert_imm8_scale!(SCALE);
18230 transmute(src:vpgatherqd_256(
18231 src.as_i32x4(),
18232 slice:base_addr as _,
18233 offsets:vindex.as_i64x4(),
18234 k,
18235 SCALE,
18236 ))
18237}
18238
18239/// Loads 4 64-bit integer elements from memory starting at location base_addr at packed 32-bit integer
18240/// indices stored in vindex scaled by scale using writemask k (elements are copied from src when the corresponding
18241/// mask bit is not set).
18242///
18243/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mmask_i64gather_epi64)
18244#[inline]
18245#[target_feature(enable = "avx512f,avx512vl")]
18246#[cfg_attr(test, assert_instr(vpgatherqq, SCALE = 1))]
18247#[rustc_legacy_const_generics(4)]
18248#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18249pub unsafe fn _mm256_mmask_i64gather_epi64<const SCALE: i32>(
18250 src: __m256i,
18251 k: __mmask8,
18252 vindex: __m256i,
18253 base_addr: *const i64,
18254) -> __m256i {
18255 static_assert_imm8_scale!(SCALE);
18256 transmute(src:vpgatherqq_256(
18257 src.as_i64x4(),
18258 slice:base_addr as _,
18259 offsets:vindex.as_i64x4(),
18260 k,
18261 SCALE,
18262 ))
18263}
18264
18265/// Loads 4 double-precision (64-bit) floating-point elements from memory starting at location base_addr
18266/// at packed 32-bit integer indices stored in vindex scaled by scale using writemask k (elements are copied
18267/// from src when the corresponding mask bit is not set).
18268///
18269/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mmask_i64gather_pd)
18270#[inline]
18271#[target_feature(enable = "avx512f,avx512vl")]
18272#[cfg_attr(test, assert_instr(vgatherqpd, SCALE = 1))]
18273#[rustc_legacy_const_generics(4)]
18274#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18275pub unsafe fn _mm256_mmask_i64gather_pd<const SCALE: i32>(
18276 src: __m256d,
18277 k: __mmask8,
18278 vindex: __m256i,
18279 base_addr: *const f64,
18280) -> __m256d {
18281 static_assert_imm8_scale!(SCALE);
18282 transmute(src:vgatherqpd_256(
18283 src.as_f64x4(),
18284 slice:base_addr as _,
18285 offsets:vindex.as_i64x4(),
18286 k,
18287 SCALE,
18288 ))
18289}
18290
18291/// Loads 4 single-precision (32-bit) floating-point elements from memory starting at location base_addr
18292/// at packed 32-bit integer indices stored in vindex scaled by scale using writemask k (elements are copied
18293/// from src when the corresponding mask bit is not set).
18294///
18295/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mmask_i64gather_ps)
18296#[inline]
18297#[target_feature(enable = "avx512f,avx512vl")]
18298#[cfg_attr(test, assert_instr(vgatherqps, SCALE = 1))]
18299#[rustc_legacy_const_generics(4)]
18300#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18301pub unsafe fn _mm256_mmask_i64gather_ps<const SCALE: i32>(
18302 src: __m128,
18303 k: __mmask8,
18304 vindex: __m256i,
18305 base_addr: *const f32,
18306) -> __m128 {
18307 static_assert_imm8_scale!(SCALE);
18308 transmute(src:vgatherqps_256(
18309 src.as_f32x4(),
18310 slice:base_addr as _,
18311 offsets:vindex.as_i64x4(),
18312 k,
18313 SCALE,
18314 ))
18315}
18316
18317/// Stores 4 32-bit integer elements from a to memory starting at location base_addr at packed 32-bit integer
18318/// indices stored in vindex scaled by scale
18319///
18320/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i32scatter_epi32)
18321#[inline]
18322#[target_feature(enable = "avx512f,avx512vl")]
18323#[cfg_attr(test, assert_instr(vpscatterdd, SCALE = 1))]
18324#[rustc_legacy_const_generics(3)]
18325#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18326pub unsafe fn _mm_i32scatter_epi32<const SCALE: i32>(
18327 base_addr: *mut i32,
18328 vindex: __m128i,
18329 a: __m128i,
18330) {
18331 static_assert_imm8_scale!(SCALE);
18332 vpscatterdd_128(slice:base_addr as _, k:0xff, offsets:vindex.as_i32x4(), src:a.as_i32x4(), SCALE)
18333}
18334
18335/// Stores 4 32-bit integer elements from a to memory starting at location base_addr at packed 32-bit integer
18336/// indices stored in vindex scaled by scale using writemask k (elements whose corresponding mask bit is not set
18337/// are not written to memory).
18338///
18339/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i32scatter_epi32)
18340#[inline]
18341#[target_feature(enable = "avx512f,avx512vl")]
18342#[cfg_attr(test, assert_instr(vpscatterdd, SCALE = 1))]
18343#[rustc_legacy_const_generics(4)]
18344#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18345pub unsafe fn _mm_mask_i32scatter_epi32<const SCALE: i32>(
18346 base_addr: *mut i32,
18347 k: __mmask8,
18348 vindex: __m128i,
18349 a: __m128i,
18350) {
18351 static_assert_imm8_scale!(SCALE);
18352 vpscatterdd_128(slice:base_addr as _, k, offsets:vindex.as_i32x4(), src:a.as_i32x4(), SCALE)
18353}
18354
18355/// Stores 2 64-bit integer elements from a to memory starting at location base_addr at packed 32-bit integer
18356/// indices stored in vindex scaled by scale
18357///
18358/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i32scatter_epi64)
18359#[inline]
18360#[target_feature(enable = "avx512f,avx512vl")]
18361#[cfg_attr(test, assert_instr(vpscatterdq, SCALE = 1))]
18362#[rustc_legacy_const_generics(3)]
18363#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18364pub unsafe fn _mm_i32scatter_epi64<const SCALE: i32>(
18365 base_addr: *mut i64,
18366 vindex: __m128i,
18367 a: __m128i,
18368) {
18369 static_assert_imm8_scale!(SCALE);
18370 vpscatterdq_128(slice:base_addr as _, k:0xff, offsets:vindex.as_i32x4(), src:a.as_i64x2(), SCALE)
18371}
18372
18373/// Stores 2 64-bit integer elements from a to memory starting at location base_addr at packed 32-bit integer
18374/// indices stored in vindex scaled by scale using writemask k (elements whose corresponding mask bit is not set
18375/// are not written to memory).
18376///
18377/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i32scatter_epi64)
18378#[inline]
18379#[target_feature(enable = "avx512f,avx512vl")]
18380#[cfg_attr(test, assert_instr(vpscatterdq, SCALE = 1))]
18381#[rustc_legacy_const_generics(4)]
18382#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18383pub unsafe fn _mm_mask_i32scatter_epi64<const SCALE: i32>(
18384 base_addr: *mut i64,
18385 k: __mmask8,
18386 vindex: __m128i,
18387 a: __m128i,
18388) {
18389 static_assert_imm8_scale!(SCALE);
18390 vpscatterdq_128(slice:base_addr as _, k, offsets:vindex.as_i32x4(), src:a.as_i64x2(), SCALE)
18391}
18392
18393/// Stores 2 double-precision (64-bit) floating-point elements from a to memory starting at location base_addr
18394/// at packed 32-bit integer indices stored in vindex scaled by scale
18395///
18396/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i32scatter_pd)
18397#[inline]
18398#[target_feature(enable = "avx512f,avx512vl")]
18399#[cfg_attr(test, assert_instr(vscatterdpd, SCALE = 1))]
18400#[rustc_legacy_const_generics(3)]
18401#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18402pub unsafe fn _mm_i32scatter_pd<const SCALE: i32>(
18403 base_addr: *mut f64,
18404 vindex: __m128i,
18405 a: __m128d,
18406) {
18407 static_assert_imm8_scale!(SCALE);
18408 vscatterdpd_128(slice:base_addr as _, k:0xff, offsets:vindex.as_i32x4(), src:a.as_f64x2(), SCALE)
18409}
18410
18411/// Stores 2 double-precision (64-bit) floating-point elements from a to memory starting at location base_addr
18412/// at packed 32-bit integer indices stored in vindex scaled by scale using writemask k (elements whose corresponding
18413/// mask bit is not set are not written to memory).
18414///
18415/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i32scatter_pd)
18416#[inline]
18417#[target_feature(enable = "avx512f,avx512vl")]
18418#[cfg_attr(test, assert_instr(vscatterdpd, SCALE = 1))]
18419#[rustc_legacy_const_generics(4)]
18420#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18421pub unsafe fn _mm_mask_i32scatter_pd<const SCALE: i32>(
18422 base_addr: *mut f64,
18423 k: __mmask8,
18424 vindex: __m128i,
18425 a: __m128d,
18426) {
18427 static_assert_imm8_scale!(SCALE);
18428 vscatterdpd_128(slice:base_addr as _, k, offsets:vindex.as_i32x4(), src:a.as_f64x2(), SCALE)
18429}
18430
18431/// Stores 4 single-precision (32-bit) floating-point elements from a to memory starting at location base_addr
18432/// at packed 32-bit integer indices stored in vindex scaled by scale
18433///
18434/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i32scatter_ps)
18435#[inline]
18436#[target_feature(enable = "avx512f,avx512vl")]
18437#[cfg_attr(test, assert_instr(vscatterdps, SCALE = 1))]
18438#[rustc_legacy_const_generics(3)]
18439#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18440pub unsafe fn _mm_i32scatter_ps<const SCALE: i32>(base_addr: *mut f32, vindex: __m128i, a: __m128) {
18441 static_assert_imm8_scale!(SCALE);
18442 vscatterdps_128(slice:base_addr as _, k:0xff, offsets:vindex.as_i32x4(), src:a.as_f32x4(), SCALE)
18443}
18444
18445/// Stores 4 single-precision (32-bit) floating-point elements from a to memory starting at location base_addr
18446/// at packed 32-bit integer indices stored in vindex scaled by scale using writemask k (elements whose corresponding
18447/// mask bit is not set are not written to memory).
18448///
18449/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i32scatter_ps)
18450#[inline]
18451#[target_feature(enable = "avx512f,avx512vl")]
18452#[cfg_attr(test, assert_instr(vscatterdps, SCALE = 1))]
18453#[rustc_legacy_const_generics(4)]
18454#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18455pub unsafe fn _mm_mask_i32scatter_ps<const SCALE: i32>(
18456 base_addr: *mut f32,
18457 k: __mmask8,
18458 vindex: __m128i,
18459 a: __m128,
18460) {
18461 static_assert_imm8_scale!(SCALE);
18462 vscatterdps_128(slice:base_addr as _, k, offsets:vindex.as_i32x4(), src:a.as_f32x4(), SCALE)
18463}
18464
18465/// Stores 2 32-bit integer elements from a to memory starting at location base_addr at packed 64-bit integer
18466/// indices stored in vindex scaled by scale
18467///
18468/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i64scatter_epi32)
18469#[inline]
18470#[target_feature(enable = "avx512f,avx512vl")]
18471#[cfg_attr(test, assert_instr(vpscatterqd, SCALE = 1))]
18472#[rustc_legacy_const_generics(3)]
18473#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18474pub unsafe fn _mm_i64scatter_epi32<const SCALE: i32>(
18475 base_addr: *mut i32,
18476 vindex: __m128i,
18477 a: __m128i,
18478) {
18479 static_assert_imm8_scale!(SCALE);
18480 vpscatterqd_128(slice:base_addr as _, k:0xff, offsets:vindex.as_i64x2(), src:a.as_i32x4(), SCALE)
18481}
18482
18483/// Stores 2 32-bit integer elements from a to memory starting at location base_addr at packed 64-bit integer
18484/// indices stored in vindex scaled by scale using writemask k (elements whose corresponding mask bit is not set
18485/// are not written to memory).
18486///
18487/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i64scatter_epi32)
18488#[inline]
18489#[target_feature(enable = "avx512f,avx512vl")]
18490#[cfg_attr(test, assert_instr(vpscatterqd, SCALE = 1))]
18491#[rustc_legacy_const_generics(4)]
18492#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18493pub unsafe fn _mm_mask_i64scatter_epi32<const SCALE: i32>(
18494 base_addr: *mut i32,
18495 k: __mmask8,
18496 vindex: __m128i,
18497 a: __m128i,
18498) {
18499 static_assert_imm8_scale!(SCALE);
18500 vpscatterqd_128(slice:base_addr as _, k, offsets:vindex.as_i64x2(), src:a.as_i32x4(), SCALE)
18501}
18502
18503/// Stores 2 64-bit integer elements from a to memory starting at location base_addr at packed 64-bit integer
18504/// indices stored in vindex scaled by scale
18505///
18506/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i64scatter_epi64)
18507#[inline]
18508#[target_feature(enable = "avx512f,avx512vl")]
18509#[cfg_attr(test, assert_instr(vpscatterqq, SCALE = 1))]
18510#[rustc_legacy_const_generics(3)]
18511#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18512pub unsafe fn _mm_i64scatter_epi64<const SCALE: i32>(
18513 base_addr: *mut i64,
18514 vindex: __m128i,
18515 a: __m128i,
18516) {
18517 static_assert_imm8_scale!(SCALE);
18518 vpscatterqq_128(slice:base_addr as _, k:0xff, offsets:vindex.as_i64x2(), src:a.as_i64x2(), SCALE)
18519}
18520
18521/// Stores 2 64-bit integer elements from a to memory starting at location base_addr at packed 64-bit integer
18522/// indices stored in vindex scaled by scale using writemask k (elements whose corresponding mask bit is not set
18523/// are not written to memory).
18524///
18525/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i64scatter_epi64)
18526#[inline]
18527#[target_feature(enable = "avx512f,avx512vl")]
18528#[cfg_attr(test, assert_instr(vpscatterqq, SCALE = 1))]
18529#[rustc_legacy_const_generics(4)]
18530#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18531pub unsafe fn _mm_mask_i64scatter_epi64<const SCALE: i32>(
18532 base_addr: *mut i64,
18533 k: __mmask8,
18534 vindex: __m128i,
18535 a: __m128i,
18536) {
18537 static_assert_imm8_scale!(SCALE);
18538 vpscatterqq_128(slice:base_addr as _, k, offsets:vindex.as_i64x2(), src:a.as_i64x2(), SCALE)
18539}
18540
18541/// Stores 2 double-precision (64-bit) floating-point elements from a to memory starting at location base_addr
18542/// at packed 64-bit integer indices stored in vindex scaled by scale
18543///
18544/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i64scatter_pd)
18545#[inline]
18546#[target_feature(enable = "avx512f,avx512vl")]
18547#[cfg_attr(test, assert_instr(vscatterqpd, SCALE = 1))]
18548#[rustc_legacy_const_generics(3)]
18549#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18550pub unsafe fn _mm_i64scatter_pd<const SCALE: i32>(
18551 base_addr: *mut f64,
18552 vindex: __m128i,
18553 a: __m128d,
18554) {
18555 static_assert_imm8_scale!(SCALE);
18556 vscatterqpd_128(slice:base_addr as _, k:0xff, offsets:vindex.as_i64x2(), src:a.as_f64x2(), SCALE)
18557}
18558
18559/// Stores 2 double-precision (64-bit) floating-point elements from a to memory starting at location base_addr
18560/// at packed 64-bit integer indices stored in vindex scaled by scale using writemask k (elements whose corresponding
18561/// mask bit is not set are not written to memory).
18562///
18563/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i64scatter_pd)
18564#[inline]
18565#[target_feature(enable = "avx512f,avx512vl")]
18566#[cfg_attr(test, assert_instr(vscatterqpd, SCALE = 1))]
18567#[rustc_legacy_const_generics(4)]
18568#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18569pub unsafe fn _mm_mask_i64scatter_pd<const SCALE: i32>(
18570 base_addr: *mut f64,
18571 k: __mmask8,
18572 vindex: __m128i,
18573 a: __m128d,
18574) {
18575 static_assert_imm8_scale!(SCALE);
18576 vscatterqpd_128(slice:base_addr as _, k, offsets:vindex.as_i64x2(), src:a.as_f64x2(), SCALE)
18577}
18578
18579/// Stores 2 single-precision (32-bit) floating-point elements from a to memory starting at location base_addr
18580/// at packed 64-bit integer indices stored in vindex scaled by scale
18581///
18582/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i64scatter_ps)
18583#[inline]
18584#[target_feature(enable = "avx512f,avx512vl")]
18585#[cfg_attr(test, assert_instr(vscatterqps, SCALE = 1))]
18586#[rustc_legacy_const_generics(3)]
18587#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18588pub unsafe fn _mm_i64scatter_ps<const SCALE: i32>(base_addr: *mut f32, vindex: __m128i, a: __m128) {
18589 static_assert_imm8_scale!(SCALE);
18590 vscatterqps_128(slice:base_addr as _, k:0xff, offsets:vindex.as_i64x2(), src:a.as_f32x4(), SCALE)
18591}
18592
18593/// Stores 2 single-precision (32-bit) floating-point elements from a to memory starting at location base_addr
18594/// at packed 64-bit integer indices stored in vindex scaled by scale using writemask k (elements whose corresponding
18595///
18596/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i64scatter_ps)
18597#[inline]
18598#[target_feature(enable = "avx512f,avx512vl")]
18599#[cfg_attr(test, assert_instr(vscatterqps, SCALE = 1))]
18600#[rustc_legacy_const_generics(4)]
18601#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18602pub unsafe fn _mm_mask_i64scatter_ps<const SCALE: i32>(
18603 base_addr: *mut f32,
18604 k: __mmask8,
18605 vindex: __m128i,
18606 a: __m128,
18607) {
18608 static_assert_imm8_scale!(SCALE);
18609 vscatterqps_128(slice:base_addr as _, k, offsets:vindex.as_i64x2(), src:a.as_f32x4(), SCALE)
18610}
18611
18612/// Loads 4 32-bit integer elements from memory starting at location base_addr at packed 32-bit integer
18613/// indices stored in vindex scaled by scale using writemask k (elements are copied from src when the corresponding
18614/// mask bit is not set).
18615///
18616/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mmask_i32gather_epi32)
18617#[inline]
18618#[target_feature(enable = "avx512f,avx512vl")]
18619#[cfg_attr(test, assert_instr(vpgatherdd, SCALE = 1))]
18620#[rustc_legacy_const_generics(4)]
18621#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18622pub unsafe fn _mm_mmask_i32gather_epi32<const SCALE: i32>(
18623 src: __m128i,
18624 k: __mmask8,
18625 vindex: __m128i,
18626 base_addr: *const i32,
18627) -> __m128i {
18628 static_assert_imm8_scale!(SCALE);
18629 transmute(src:vpgatherdd_128(
18630 src.as_i32x4(),
18631 slice:base_addr as _,
18632 offsets:vindex.as_i32x4(),
18633 k,
18634 SCALE,
18635 ))
18636}
18637
18638/// Loads 2 64-bit integer elements from memory starting at location base_addr at packed 32-bit integer
18639/// indices stored in vindex scaled by scale using writemask k (elements are copied from src when the corresponding
18640/// mask bit is not set).
18641///
18642/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mmask_i32gather_epi64)
18643#[inline]
18644#[target_feature(enable = "avx512f,avx512vl")]
18645#[cfg_attr(test, assert_instr(vpgatherdq, SCALE = 1))]
18646#[rustc_legacy_const_generics(4)]
18647#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18648pub unsafe fn _mm_mmask_i32gather_epi64<const SCALE: i32>(
18649 src: __m128i,
18650 k: __mmask8,
18651 vindex: __m128i,
18652 base_addr: *const i64,
18653) -> __m128i {
18654 static_assert_imm8_scale!(SCALE);
18655 transmute(src:vpgatherdq_128(
18656 src.as_i64x2(),
18657 slice:base_addr as _,
18658 offsets:vindex.as_i32x4(),
18659 k,
18660 SCALE,
18661 ))
18662}
18663
18664/// Loads 2 double-precision (64-bit) floating-point elements from memory starting at location base_addr
18665/// at packed 32-bit integer indices stored in vindex scaled by scale using writemask k (elements are copied
18666/// from src when the corresponding mask bit is not set).
18667///
18668/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mmask_i32gather_pd)
18669#[inline]
18670#[target_feature(enable = "avx512f,avx512vl")]
18671#[cfg_attr(test, assert_instr(vgatherdpd, SCALE = 1))]
18672#[rustc_legacy_const_generics(4)]
18673#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18674pub unsafe fn _mm_mmask_i32gather_pd<const SCALE: i32>(
18675 src: __m128d,
18676 k: __mmask8,
18677 vindex: __m128i,
18678 base_addr: *const f64,
18679) -> __m128d {
18680 static_assert_imm8_scale!(SCALE);
18681 transmute(src:vgatherdpd_128(
18682 src.as_f64x2(),
18683 slice:base_addr as _,
18684 offsets:vindex.as_i32x4(),
18685 k,
18686 SCALE,
18687 ))
18688}
18689
18690/// Loads 4 single-precision (32-bit) floating-point elements from memory starting at location base_addr
18691/// at packed 32-bit integer indices stored in vindex scaled by scale using writemask k (elements are copied
18692/// from src when the corresponding mask bit is not set).
18693///
18694/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mmask_i32gather_ps)
18695#[inline]
18696#[target_feature(enable = "avx512f,avx512vl")]
18697#[cfg_attr(test, assert_instr(vgatherdps, SCALE = 1))]
18698#[rustc_legacy_const_generics(4)]
18699#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18700pub unsafe fn _mm_mmask_i32gather_ps<const SCALE: i32>(
18701 src: __m128,
18702 k: __mmask8,
18703 vindex: __m128i,
18704 base_addr: *const f32,
18705) -> __m128 {
18706 static_assert_imm8_scale!(SCALE);
18707 transmute(src:vgatherdps_128(
18708 src.as_f32x4(),
18709 slice:base_addr as _,
18710 offsets:vindex.as_i32x4(),
18711 k,
18712 SCALE,
18713 ))
18714}
18715
18716/// Loads 2 32-bit integer elements from memory starting at location base_addr at packed 64-bit integer
18717/// indices stored in vindex scaled by scale using writemask k (elements are copied from src when the corresponding
18718/// mask bit is not set).
18719///
18720/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mmask_i64gather_epi32)
18721#[inline]
18722#[target_feature(enable = "avx512f,avx512vl")]
18723#[cfg_attr(test, assert_instr(vpgatherqd, SCALE = 1))]
18724#[rustc_legacy_const_generics(4)]
18725#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18726pub unsafe fn _mm_mmask_i64gather_epi32<const SCALE: i32>(
18727 src: __m128i,
18728 k: __mmask8,
18729 vindex: __m128i,
18730 base_addr: *const i32,
18731) -> __m128i {
18732 static_assert_imm8_scale!(SCALE);
18733 transmute(src:vpgatherqd_128(
18734 src.as_i32x4(),
18735 slice:base_addr as _,
18736 offsets:vindex.as_i64x2(),
18737 k,
18738 SCALE,
18739 ))
18740}
18741
18742/// Loads 2 64-bit integer elements from memory starting at location base_addr at packed 64-bit integer
18743/// indices stored in vindex scaled by scale using writemask k (elements are copied from src when the corresponding
18744/// mask bit is not set).
18745///
18746/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mmask_i64gather_epi64)
18747#[inline]
18748#[target_feature(enable = "avx512f,avx512vl")]
18749#[cfg_attr(test, assert_instr(vpgatherqq, SCALE = 1))]
18750#[rustc_legacy_const_generics(4)]
18751#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18752pub unsafe fn _mm_mmask_i64gather_epi64<const SCALE: i32>(
18753 src: __m128i,
18754 k: __mmask8,
18755 vindex: __m128i,
18756 base_addr: *const i64,
18757) -> __m128i {
18758 static_assert_imm8_scale!(SCALE);
18759 transmute(src:vpgatherqq_128(
18760 src.as_i64x2(),
18761 slice:base_addr as _,
18762 offsets:vindex.as_i64x2(),
18763 k,
18764 SCALE,
18765 ))
18766}
18767
18768/// Loads 2 double-precision (64-bit) floating-point elements from memory starting at location base_addr
18769/// at packed 64-bit integer indices stored in vindex scaled by scale using writemask k (elements are copied
18770/// from src when the corresponding mask bit is not set).
18771///
18772/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mmask_i64gather_pd)
18773#[inline]
18774#[target_feature(enable = "avx512f,avx512vl")]
18775#[cfg_attr(test, assert_instr(vgatherqpd, SCALE = 1))]
18776#[rustc_legacy_const_generics(4)]
18777#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18778pub unsafe fn _mm_mmask_i64gather_pd<const SCALE: i32>(
18779 src: __m128d,
18780 k: __mmask8,
18781 vindex: __m128i,
18782 base_addr: *const f64,
18783) -> __m128d {
18784 static_assert_imm8_scale!(SCALE);
18785 transmute(src:vgatherqpd_128(
18786 src.as_f64x2(),
18787 slice:base_addr as _,
18788 offsets:vindex.as_i64x2(),
18789 k,
18790 SCALE,
18791 ))
18792}
18793
18794/// Loads 2 single-precision (32-bit) floating-point elements from memory starting at location base_addr
18795/// at packed 64-bit integer indices stored in vindex scaled by scale using writemask k (elements are copied
18796/// from src when the corresponding mask bit is not set).
18797///
18798/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mmask_i64gather_ps)
18799#[inline]
18800#[target_feature(enable = "avx512f,avx512vl")]
18801#[cfg_attr(test, assert_instr(vgatherqps, SCALE = 1))]
18802#[rustc_legacy_const_generics(4)]
18803#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18804pub unsafe fn _mm_mmask_i64gather_ps<const SCALE: i32>(
18805 src: __m128,
18806 k: __mmask8,
18807 vindex: __m128i,
18808 base_addr: *const f32,
18809) -> __m128 {
18810 static_assert_imm8_scale!(SCALE);
18811 transmute(src:vgatherqps_128(
18812 src.as_f32x4(),
18813 slice:base_addr as _,
18814 offsets:vindex.as_i64x2(),
18815 k,
18816 SCALE,
18817 ))
18818}
18819
18820/// Contiguously store the active 32-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
18821///
18822/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_compress_epi32&expand=1198)
18823#[inline]
18824#[target_feature(enable = "avx512f")]
18825#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18826#[cfg_attr(test, assert_instr(vpcompressd))]
18827pub fn _mm512_mask_compress_epi32(src: __m512i, k: __mmask16, a: __m512i) -> __m512i {
18828 unsafe { transmute(src:vpcompressd(a.as_i32x16(), src.as_i32x16(), mask:k)) }
18829}
18830
18831/// Contiguously store the active 32-bit integers in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
18832///
18833/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_compress_epi32&expand=1199)
18834#[inline]
18835#[target_feature(enable = "avx512f")]
18836#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18837#[cfg_attr(test, assert_instr(vpcompressd))]
18838pub fn _mm512_maskz_compress_epi32(k: __mmask16, a: __m512i) -> __m512i {
18839 unsafe { transmute(src:vpcompressd(a.as_i32x16(), src:i32x16::ZERO, mask:k)) }
18840}
18841
18842/// Contiguously store the active 32-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
18843///
18844/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_compress_epi32&expand=1196)
18845#[inline]
18846#[target_feature(enable = "avx512f,avx512vl")]
18847#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18848#[cfg_attr(test, assert_instr(vpcompressd))]
18849pub fn _mm256_mask_compress_epi32(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
18850 unsafe { transmute(src:vpcompressd256(a.as_i32x8(), src.as_i32x8(), mask:k)) }
18851}
18852
18853/// Contiguously store the active 32-bit integers in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
18854///
18855/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_compress_epi32&expand=1197)
18856#[inline]
18857#[target_feature(enable = "avx512f,avx512vl")]
18858#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18859#[cfg_attr(test, assert_instr(vpcompressd))]
18860pub fn _mm256_maskz_compress_epi32(k: __mmask8, a: __m256i) -> __m256i {
18861 unsafe { transmute(src:vpcompressd256(a.as_i32x8(), src:i32x8::ZERO, mask:k)) }
18862}
18863
18864/// Contiguously store the active 32-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
18865///
18866/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_compress_epi32&expand=1194)
18867#[inline]
18868#[target_feature(enable = "avx512f,avx512vl")]
18869#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18870#[cfg_attr(test, assert_instr(vpcompressd))]
18871pub fn _mm_mask_compress_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
18872 unsafe { transmute(src:vpcompressd128(a.as_i32x4(), src.as_i32x4(), mask:k)) }
18873}
18874
18875/// Contiguously store the active 32-bit integers in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
18876///
18877/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_compress_epi32&expand=1195)
18878#[inline]
18879#[target_feature(enable = "avx512f,avx512vl")]
18880#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18881#[cfg_attr(test, assert_instr(vpcompressd))]
18882pub fn _mm_maskz_compress_epi32(k: __mmask8, a: __m128i) -> __m128i {
18883 unsafe { transmute(src:vpcompressd128(a.as_i32x4(), src:i32x4::ZERO, mask:k)) }
18884}
18885
18886/// Contiguously store the active 64-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
18887///
18888/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_compress_epi64&expand=1204)
18889#[inline]
18890#[target_feature(enable = "avx512f")]
18891#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18892#[cfg_attr(test, assert_instr(vpcompressq))]
18893pub fn _mm512_mask_compress_epi64(src: __m512i, k: __mmask8, a: __m512i) -> __m512i {
18894 unsafe { transmute(src:vpcompressq(a.as_i64x8(), src.as_i64x8(), mask:k)) }
18895}
18896
18897/// Contiguously store the active 64-bit integers in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
18898///
18899/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_compress_epi64&expand=1205)
18900#[inline]
18901#[target_feature(enable = "avx512f")]
18902#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18903#[cfg_attr(test, assert_instr(vpcompressq))]
18904pub fn _mm512_maskz_compress_epi64(k: __mmask8, a: __m512i) -> __m512i {
18905 unsafe { transmute(src:vpcompressq(a.as_i64x8(), src:i64x8::ZERO, mask:k)) }
18906}
18907
18908/// Contiguously store the active 64-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
18909///
18910/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_compress_epi64&expand=1202)
18911#[inline]
18912#[target_feature(enable = "avx512f,avx512vl")]
18913#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18914#[cfg_attr(test, assert_instr(vpcompressq))]
18915pub fn _mm256_mask_compress_epi64(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
18916 unsafe { transmute(src:vpcompressq256(a.as_i64x4(), src.as_i64x4(), mask:k)) }
18917}
18918
18919/// Contiguously store the active 64-bit integers in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
18920///
18921/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_compress_epi64&expand=1203)
18922#[inline]
18923#[target_feature(enable = "avx512f,avx512vl")]
18924#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18925#[cfg_attr(test, assert_instr(vpcompressq))]
18926pub fn _mm256_maskz_compress_epi64(k: __mmask8, a: __m256i) -> __m256i {
18927 unsafe { transmute(src:vpcompressq256(a.as_i64x4(), src:i64x4::ZERO, mask:k)) }
18928}
18929
18930/// Contiguously store the active 64-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
18931///
18932/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_compress_epi64&expand=1200)
18933#[inline]
18934#[target_feature(enable = "avx512f,avx512vl")]
18935#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18936#[cfg_attr(test, assert_instr(vpcompressq))]
18937pub fn _mm_mask_compress_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
18938 unsafe { transmute(src:vpcompressq128(a.as_i64x2(), src.as_i64x2(), mask:k)) }
18939}
18940
18941/// Contiguously store the active 64-bit integers in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
18942///
18943/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_compress_epi64&expand=1201)
18944#[inline]
18945#[target_feature(enable = "avx512f,avx512vl")]
18946#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18947#[cfg_attr(test, assert_instr(vpcompressq))]
18948pub fn _mm_maskz_compress_epi64(k: __mmask8, a: __m128i) -> __m128i {
18949 unsafe { transmute(src:vpcompressq128(a.as_i64x2(), src:i64x2::ZERO, mask:k)) }
18950}
18951
18952/// Contiguously store the active single-precision (32-bit) floating-point elements in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
18953///
18954/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_compress_ps&expand=1222)
18955#[inline]
18956#[target_feature(enable = "avx512f")]
18957#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18958#[cfg_attr(test, assert_instr(vcompressps))]
18959pub fn _mm512_mask_compress_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 {
18960 unsafe { transmute(src:vcompressps(a.as_f32x16(), src.as_f32x16(), mask:k)) }
18961}
18962
18963/// Contiguously store the active single-precision (32-bit) floating-point elements in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
18964///
18965/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_compress_ps&expand=1223)
18966#[inline]
18967#[target_feature(enable = "avx512f")]
18968#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18969#[cfg_attr(test, assert_instr(vcompressps))]
18970pub fn _mm512_maskz_compress_ps(k: __mmask16, a: __m512) -> __m512 {
18971 unsafe { transmute(src:vcompressps(a.as_f32x16(), src:f32x16::ZERO, mask:k)) }
18972}
18973
18974/// Contiguously store the active single-precision (32-bit) floating-point elements in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
18975///
18976/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_compress_ps&expand=1220)
18977#[inline]
18978#[target_feature(enable = "avx512f,avx512vl")]
18979#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18980#[cfg_attr(test, assert_instr(vcompressps))]
18981pub fn _mm256_mask_compress_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 {
18982 unsafe { transmute(src:vcompressps256(a.as_f32x8(), src.as_f32x8(), mask:k)) }
18983}
18984
18985/// Contiguously store the active single-precision (32-bit) floating-point elements in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
18986///
18987/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_compress_ps&expand=1221)
18988#[inline]
18989#[target_feature(enable = "avx512f,avx512vl")]
18990#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18991#[cfg_attr(test, assert_instr(vcompressps))]
18992pub fn _mm256_maskz_compress_ps(k: __mmask8, a: __m256) -> __m256 {
18993 unsafe { transmute(src:vcompressps256(a.as_f32x8(), src:f32x8::ZERO, mask:k)) }
18994}
18995
18996/// Contiguously store the active single-precision (32-bit) floating-point elements in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
18997///
18998/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_compress_ps&expand=1218)
18999#[inline]
19000#[target_feature(enable = "avx512f,avx512vl")]
19001#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19002#[cfg_attr(test, assert_instr(vcompressps))]
19003pub fn _mm_mask_compress_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 {
19004 unsafe { transmute(src:vcompressps128(a.as_f32x4(), src.as_f32x4(), mask:k)) }
19005}
19006
19007/// Contiguously store the active single-precision (32-bit) floating-point elements in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
19008///
19009/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_compress_ps&expand=1219)
19010#[inline]
19011#[target_feature(enable = "avx512f,avx512vl")]
19012#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19013#[cfg_attr(test, assert_instr(vcompressps))]
19014pub fn _mm_maskz_compress_ps(k: __mmask8, a: __m128) -> __m128 {
19015 unsafe { transmute(src:vcompressps128(a.as_f32x4(), src:f32x4::ZERO, mask:k)) }
19016}
19017
19018/// Contiguously store the active double-precision (64-bit) floating-point elements in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
19019///
19020/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_compress_pd&expand=1216)
19021#[inline]
19022#[target_feature(enable = "avx512f")]
19023#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19024#[cfg_attr(test, assert_instr(vcompresspd))]
19025pub fn _mm512_mask_compress_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d {
19026 unsafe { transmute(src:vcompresspd(a.as_f64x8(), src.as_f64x8(), mask:k)) }
19027}
19028
19029/// Contiguously store the active double-precision (64-bit) floating-point elements in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
19030///
19031/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_compress_pd&expand=1217)
19032#[inline]
19033#[target_feature(enable = "avx512f")]
19034#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19035#[cfg_attr(test, assert_instr(vcompresspd))]
19036pub fn _mm512_maskz_compress_pd(k: __mmask8, a: __m512d) -> __m512d {
19037 unsafe { transmute(src:vcompresspd(a.as_f64x8(), src:f64x8::ZERO, mask:k)) }
19038}
19039
19040/// Contiguously store the active double-precision (64-bit) floating-point elements in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
19041///
19042/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_compress_pd&expand=1214)
19043#[inline]
19044#[target_feature(enable = "avx512f,avx512vl")]
19045#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19046#[cfg_attr(test, assert_instr(vcompresspd))]
19047pub fn _mm256_mask_compress_pd(src: __m256d, k: __mmask8, a: __m256d) -> __m256d {
19048 unsafe { transmute(src:vcompresspd256(a.as_f64x4(), src.as_f64x4(), mask:k)) }
19049}
19050
19051/// Contiguously store the active double-precision (64-bit) floating-point elements in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
19052///
19053/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_compress_pd&expand=1215)
19054#[inline]
19055#[target_feature(enable = "avx512f,avx512vl")]
19056#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19057#[cfg_attr(test, assert_instr(vcompresspd))]
19058pub fn _mm256_maskz_compress_pd(k: __mmask8, a: __m256d) -> __m256d {
19059 unsafe { transmute(src:vcompresspd256(a.as_f64x4(), src:f64x4::ZERO, mask:k)) }
19060}
19061
19062/// Contiguously store the active double-precision (64-bit) floating-point elements in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
19063///
19064/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_compress_pd&expand=1212)
19065#[inline]
19066#[target_feature(enable = "avx512f,avx512vl")]
19067#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19068#[cfg_attr(test, assert_instr(vcompresspd))]
19069pub fn _mm_mask_compress_pd(src: __m128d, k: __mmask8, a: __m128d) -> __m128d {
19070 unsafe { transmute(src:vcompresspd128(a.as_f64x2(), src.as_f64x2(), mask:k)) }
19071}
19072
19073/// Contiguously store the active double-precision (64-bit) floating-point elements in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
19074///
19075/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_compress_pd&expand=1213)
19076#[inline]
19077#[target_feature(enable = "avx512f,avx512vl")]
19078#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19079#[cfg_attr(test, assert_instr(vcompresspd))]
19080pub fn _mm_maskz_compress_pd(k: __mmask8, a: __m128d) -> __m128d {
19081 unsafe { transmute(src:vcompresspd128(a.as_f64x2(), src:f64x2::ZERO, mask:k)) }
19082}
19083
19084/// Contiguously store the active 32-bit integers in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
19085///
19086/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_compressstoreu_epi32)
19087#[inline]
19088#[target_feature(enable = "avx512f")]
19089#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19090#[cfg_attr(test, assert_instr(vpcompressd))]
19091pub unsafe fn _mm512_mask_compressstoreu_epi32(base_addr: *mut i32, k: __mmask16, a: __m512i) {
19092 vcompressstored(mem:base_addr as *mut _, data:a.as_i32x16(), mask:k)
19093}
19094
19095/// Contiguously store the active 32-bit integers in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
19096///
19097/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_compressstoreu_epi32)
19098#[inline]
19099#[target_feature(enable = "avx512f,avx512vl")]
19100#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19101#[cfg_attr(test, assert_instr(vpcompressd))]
19102pub unsafe fn _mm256_mask_compressstoreu_epi32(base_addr: *mut i32, k: __mmask8, a: __m256i) {
19103 vcompressstored256(mem:base_addr as *mut _, data:a.as_i32x8(), mask:k)
19104}
19105
19106/// Contiguously store the active 32-bit integers in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
19107///
19108/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_compressstoreu_epi32)
19109#[inline]
19110#[target_feature(enable = "avx512f,avx512vl")]
19111#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19112#[cfg_attr(test, assert_instr(vpcompressd))]
19113pub unsafe fn _mm_mask_compressstoreu_epi32(base_addr: *mut i32, k: __mmask8, a: __m128i) {
19114 vcompressstored128(mem:base_addr as *mut _, data:a.as_i32x4(), mask:k)
19115}
19116
19117/// Contiguously store the active 64-bit integers in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
19118///
19119/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_compressstoreu_epi64)
19120#[inline]
19121#[target_feature(enable = "avx512f")]
19122#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19123#[cfg_attr(test, assert_instr(vpcompressq))]
19124pub unsafe fn _mm512_mask_compressstoreu_epi64(base_addr: *mut i64, k: __mmask8, a: __m512i) {
19125 vcompressstoreq(mem:base_addr as *mut _, data:a.as_i64x8(), mask:k)
19126}
19127
19128/// Contiguously store the active 64-bit integers in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
19129///
19130/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_compressstoreu_epi64)
19131#[inline]
19132#[target_feature(enable = "avx512f,avx512vl")]
19133#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19134#[cfg_attr(test, assert_instr(vpcompressq))]
19135pub unsafe fn _mm256_mask_compressstoreu_epi64(base_addr: *mut i64, k: __mmask8, a: __m256i) {
19136 vcompressstoreq256(mem:base_addr as *mut _, data:a.as_i64x4(), mask:k)
19137}
19138
19139/// Contiguously store the active 64-bit integers in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
19140///
19141/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_compressstoreu_epi64)
19142#[inline]
19143#[target_feature(enable = "avx512f,avx512vl")]
19144#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19145#[cfg_attr(test, assert_instr(vpcompressq))]
19146pub unsafe fn _mm_mask_compressstoreu_epi64(base_addr: *mut i64, k: __mmask8, a: __m128i) {
19147 vcompressstoreq128(mem:base_addr as *mut _, data:a.as_i64x2(), mask:k)
19148}
19149
19150/// Contiguously store the active single-precision (32-bit) floating-point elements in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
19151///
19152/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_compressstoreu_ps)
19153#[inline]
19154#[target_feature(enable = "avx512f")]
19155#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19156#[cfg_attr(test, assert_instr(vcompressps))]
19157pub unsafe fn _mm512_mask_compressstoreu_ps(base_addr: *mut f32, k: __mmask16, a: __m512) {
19158 vcompressstoreps(mem:base_addr as *mut _, data:a.as_f32x16(), mask:k)
19159}
19160
19161/// Contiguously store the active single-precision (32-bit) floating-point elements in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
19162///
19163/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_compressstoreu_ps)
19164#[inline]
19165#[target_feature(enable = "avx512f,avx512vl")]
19166#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19167#[cfg_attr(test, assert_instr(vcompressps))]
19168pub unsafe fn _mm256_mask_compressstoreu_ps(base_addr: *mut f32, k: __mmask8, a: __m256) {
19169 vcompressstoreps256(mem:base_addr as *mut _, data:a.as_f32x8(), mask:k)
19170}
19171
19172/// Contiguously store the active single-precision (32-bit) floating-point elements in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
19173///
19174/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_compressstoreu_ps)
19175#[inline]
19176#[target_feature(enable = "avx512f,avx512vl")]
19177#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19178#[cfg_attr(test, assert_instr(vcompressps))]
19179pub unsafe fn _mm_mask_compressstoreu_ps(base_addr: *mut f32, k: __mmask8, a: __m128) {
19180 vcompressstoreps128(mem:base_addr as *mut _, data:a.as_f32x4(), mask:k)
19181}
19182
19183/// Contiguously store the active double-precision (64-bit) floating-point elements in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
19184///
19185/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_compressstoreu_pd)
19186#[inline]
19187#[target_feature(enable = "avx512f")]
19188#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19189#[cfg_attr(test, assert_instr(vcompresspd))]
19190pub unsafe fn _mm512_mask_compressstoreu_pd(base_addr: *mut f64, k: __mmask8, a: __m512d) {
19191 vcompressstorepd(mem:base_addr as *mut _, data:a.as_f64x8(), mask:k)
19192}
19193
19194/// Contiguously store the active double-precision (64-bit) floating-point elements in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
19195///
19196/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_compressstoreu_pd)
19197#[inline]
19198#[target_feature(enable = "avx512f,avx512vl")]
19199#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19200#[cfg_attr(test, assert_instr(vcompresspd))]
19201pub unsafe fn _mm256_mask_compressstoreu_pd(base_addr: *mut f64, k: __mmask8, a: __m256d) {
19202 vcompressstorepd256(mem:base_addr as *mut _, data:a.as_f64x4(), mask:k)
19203}
19204
19205/// Contiguously store the active double-precision (64-bit) floating-point elements in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
19206///
19207/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_compressstoreu_pd)
19208#[inline]
19209#[target_feature(enable = "avx512f,avx512vl")]
19210#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19211#[cfg_attr(test, assert_instr(vcompresspd))]
19212pub unsafe fn _mm_mask_compressstoreu_pd(base_addr: *mut f64, k: __mmask8, a: __m128d) {
19213 vcompressstorepd128(mem:base_addr as *mut _, data:a.as_f64x2(), mask:k)
19214}
19215
19216/// Load contiguous active 32-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19217///
19218/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_expand_epi32&expand=2316)
19219#[inline]
19220#[target_feature(enable = "avx512f")]
19221#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19222#[cfg_attr(test, assert_instr(vpexpandd))]
19223pub fn _mm512_mask_expand_epi32(src: __m512i, k: __mmask16, a: __m512i) -> __m512i {
19224 unsafe { transmute(src:vpexpandd(a.as_i32x16(), src.as_i32x16(), mask:k)) }
19225}
19226
19227/// Load contiguous active 32-bit integers from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19228///
19229/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_expand_epi32&expand=2317)
19230#[inline]
19231#[target_feature(enable = "avx512f")]
19232#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19233#[cfg_attr(test, assert_instr(vpexpandd))]
19234pub fn _mm512_maskz_expand_epi32(k: __mmask16, a: __m512i) -> __m512i {
19235 unsafe { transmute(src:vpexpandd(a.as_i32x16(), src:i32x16::ZERO, mask:k)) }
19236}
19237
19238/// Load contiguous active 32-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19239///
19240/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_expand_epi32&expand=2314)
19241#[inline]
19242#[target_feature(enable = "avx512f,avx512vl")]
19243#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19244#[cfg_attr(test, assert_instr(vpexpandd))]
19245pub fn _mm256_mask_expand_epi32(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
19246 unsafe { transmute(src:vpexpandd256(a.as_i32x8(), src.as_i32x8(), mask:k)) }
19247}
19248
19249/// Load contiguous active 32-bit integers from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19250///
19251/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_expand_epi32&expand=2315)
19252#[inline]
19253#[target_feature(enable = "avx512f,avx512vl")]
19254#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19255#[cfg_attr(test, assert_instr(vpexpandd))]
19256pub fn _mm256_maskz_expand_epi32(k: __mmask8, a: __m256i) -> __m256i {
19257 unsafe { transmute(src:vpexpandd256(a.as_i32x8(), src:i32x8::ZERO, mask:k)) }
19258}
19259
19260/// Load contiguous active 32-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19261///
19262/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_expand_epi32&expand=2312)
19263#[inline]
19264#[target_feature(enable = "avx512f,avx512vl")]
19265#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19266#[cfg_attr(test, assert_instr(vpexpandd))]
19267pub fn _mm_mask_expand_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
19268 unsafe { transmute(src:vpexpandd128(a.as_i32x4(), src.as_i32x4(), mask:k)) }
19269}
19270
19271/// Load contiguous active 32-bit integers from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19272///
19273/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_expand_epi32&expand=2313)
19274#[inline]
19275#[target_feature(enable = "avx512f,avx512vl")]
19276#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19277#[cfg_attr(test, assert_instr(vpexpandd))]
19278pub fn _mm_maskz_expand_epi32(k: __mmask8, a: __m128i) -> __m128i {
19279 unsafe { transmute(src:vpexpandd128(a.as_i32x4(), src:i32x4::ZERO, mask:k)) }
19280}
19281
19282/// Load contiguous active 64-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19283///
19284/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_expand_epi64&expand=2322)
19285#[inline]
19286#[target_feature(enable = "avx512f")]
19287#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19288#[cfg_attr(test, assert_instr(vpexpandq))]
19289pub fn _mm512_mask_expand_epi64(src: __m512i, k: __mmask8, a: __m512i) -> __m512i {
19290 unsafe { transmute(src:vpexpandq(a.as_i64x8(), src.as_i64x8(), mask:k)) }
19291}
19292
19293/// Load contiguous active 64-bit integers from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19294///
19295/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_expand_epi64&expand=2323)
19296#[inline]
19297#[target_feature(enable = "avx512f")]
19298#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19299#[cfg_attr(test, assert_instr(vpexpandq))]
19300pub fn _mm512_maskz_expand_epi64(k: __mmask8, a: __m512i) -> __m512i {
19301 unsafe { transmute(src:vpexpandq(a.as_i64x8(), src:i64x8::ZERO, mask:k)) }
19302}
19303
19304/// Load contiguous active 64-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19305///
19306/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_expand_epi64&expand=2320)
19307#[inline]
19308#[target_feature(enable = "avx512f,avx512vl")]
19309#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19310#[cfg_attr(test, assert_instr(vpexpandq))]
19311pub fn _mm256_mask_expand_epi64(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
19312 unsafe { transmute(src:vpexpandq256(a.as_i64x4(), src.as_i64x4(), mask:k)) }
19313}
19314
19315/// Load contiguous active 64-bit integers from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19316///
19317/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_expand_epi64&expand=2321)
19318#[inline]
19319#[target_feature(enable = "avx512f,avx512vl")]
19320#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19321#[cfg_attr(test, assert_instr(vpexpandq))]
19322pub fn _mm256_maskz_expand_epi64(k: __mmask8, a: __m256i) -> __m256i {
19323 unsafe { transmute(src:vpexpandq256(a.as_i64x4(), src:i64x4::ZERO, mask:k)) }
19324}
19325
19326/// Load contiguous active 64-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19327///
19328/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_expand_epi64&expand=2318)
19329#[inline]
19330#[target_feature(enable = "avx512f,avx512vl")]
19331#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19332#[cfg_attr(test, assert_instr(vpexpandq))]
19333pub fn _mm_mask_expand_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
19334 unsafe { transmute(src:vpexpandq128(a.as_i64x2(), src.as_i64x2(), mask:k)) }
19335}
19336
19337/// Load contiguous active 64-bit integers from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19338///
19339/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_expand_epi64&expand=2319)
19340#[inline]
19341#[target_feature(enable = "avx512f,avx512vl")]
19342#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19343#[cfg_attr(test, assert_instr(vpexpandq))]
19344pub fn _mm_maskz_expand_epi64(k: __mmask8, a: __m128i) -> __m128i {
19345 unsafe { transmute(src:vpexpandq128(a.as_i64x2(), src:i64x2::ZERO, mask:k)) }
19346}
19347
19348/// Load contiguous active single-precision (32-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19349///
19350/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_expand_ps&expand=2340)
19351#[inline]
19352#[target_feature(enable = "avx512f")]
19353#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19354#[cfg_attr(test, assert_instr(vexpandps))]
19355pub fn _mm512_mask_expand_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 {
19356 unsafe { transmute(src:vexpandps(a.as_f32x16(), src.as_f32x16(), mask:k)) }
19357}
19358
19359/// Load contiguous active single-precision (32-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19360///
19361/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_expand_ps&expand=2341)
19362#[inline]
19363#[target_feature(enable = "avx512f")]
19364#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19365#[cfg_attr(test, assert_instr(vexpandps))]
19366pub fn _mm512_maskz_expand_ps(k: __mmask16, a: __m512) -> __m512 {
19367 unsafe { transmute(src:vexpandps(a.as_f32x16(), src:f32x16::ZERO, mask:k)) }
19368}
19369
19370/// Load contiguous active single-precision (32-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19371///
19372/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_expand_ps&expand=2338)
19373#[inline]
19374#[target_feature(enable = "avx512f,avx512vl")]
19375#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19376#[cfg_attr(test, assert_instr(vexpandps))]
19377pub fn _mm256_mask_expand_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 {
19378 unsafe { transmute(src:vexpandps256(a.as_f32x8(), src.as_f32x8(), mask:k)) }
19379}
19380
19381/// Load contiguous active single-precision (32-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19382///
19383/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_expand_ps&expand=2339)
19384#[inline]
19385#[target_feature(enable = "avx512f,avx512vl")]
19386#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19387#[cfg_attr(test, assert_instr(vexpandps))]
19388pub fn _mm256_maskz_expand_ps(k: __mmask8, a: __m256) -> __m256 {
19389 unsafe { transmute(src:vexpandps256(a.as_f32x8(), src:f32x8::ZERO, mask:k)) }
19390}
19391
19392/// Load contiguous active single-precision (32-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19393///
19394/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_expand_ps&expand=2336)
19395#[inline]
19396#[target_feature(enable = "avx512f,avx512vl")]
19397#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19398#[cfg_attr(test, assert_instr(vexpandps))]
19399pub fn _mm_mask_expand_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 {
19400 unsafe { transmute(src:vexpandps128(a.as_f32x4(), src.as_f32x4(), mask:k)) }
19401}
19402
19403/// Load contiguous active single-precision (32-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19404///
19405/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_expand_ps&expand=2337)
19406#[inline]
19407#[target_feature(enable = "avx512f,avx512vl")]
19408#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19409#[cfg_attr(test, assert_instr(vexpandps))]
19410pub fn _mm_maskz_expand_ps(k: __mmask8, a: __m128) -> __m128 {
19411 unsafe { transmute(src:vexpandps128(a.as_f32x4(), src:f32x4::ZERO, mask:k)) }
19412}
19413
19414/// Load contiguous active double-precision (64-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19415///
19416/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_expand_pd&expand=2334)
19417#[inline]
19418#[target_feature(enable = "avx512f")]
19419#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19420#[cfg_attr(test, assert_instr(vexpandpd))]
19421pub fn _mm512_mask_expand_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d {
19422 unsafe { transmute(src:vexpandpd(a.as_f64x8(), src.as_f64x8(), mask:k)) }
19423}
19424
19425/// Load contiguous active double-precision (64-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19426///
19427/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_expand_pd&expand=2335)
19428#[inline]
19429#[target_feature(enable = "avx512f")]
19430#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19431#[cfg_attr(test, assert_instr(vexpandpd))]
19432pub fn _mm512_maskz_expand_pd(k: __mmask8, a: __m512d) -> __m512d {
19433 unsafe { transmute(src:vexpandpd(a.as_f64x8(), src:f64x8::ZERO, mask:k)) }
19434}
19435
19436/// Load contiguous active double-precision (64-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19437///
19438/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_expand_pd&expand=2332)
19439#[inline]
19440#[target_feature(enable = "avx512f,avx512vl")]
19441#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19442#[cfg_attr(test, assert_instr(vexpandpd))]
19443pub fn _mm256_mask_expand_pd(src: __m256d, k: __mmask8, a: __m256d) -> __m256d {
19444 unsafe { transmute(src:vexpandpd256(a.as_f64x4(), src.as_f64x4(), mask:k)) }
19445}
19446
19447/// Load contiguous active double-precision (64-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19448///
19449/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_expand_pd&expand=2333)
19450#[inline]
19451#[target_feature(enable = "avx512f,avx512vl")]
19452#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19453#[cfg_attr(test, assert_instr(vexpandpd))]
19454pub fn _mm256_maskz_expand_pd(k: __mmask8, a: __m256d) -> __m256d {
19455 unsafe { transmute(src:vexpandpd256(a.as_f64x4(), src:f64x4::ZERO, mask:k)) }
19456}
19457
19458/// Load contiguous active double-precision (64-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19459///
19460/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_expand_pd&expand=2330)
19461#[inline]
19462#[target_feature(enable = "avx512f,avx512vl")]
19463#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19464#[cfg_attr(test, assert_instr(vexpandpd))]
19465pub fn _mm_mask_expand_pd(src: __m128d, k: __mmask8, a: __m128d) -> __m128d {
19466 unsafe { transmute(src:vexpandpd128(a.as_f64x2(), src.as_f64x2(), mask:k)) }
19467}
19468
19469/// Load contiguous active double-precision (64-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19470///
19471/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_expand_pd&expand=2331)
19472#[inline]
19473#[target_feature(enable = "avx512f,avx512vl")]
19474#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19475#[cfg_attr(test, assert_instr(vexpandpd))]
19476pub fn _mm_maskz_expand_pd(k: __mmask8, a: __m128d) -> __m128d {
19477 unsafe { transmute(src:vexpandpd128(a.as_f64x2(), src:f64x2::ZERO, mask:k)) }
19478}
19479
19480/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst.
19481///
19482/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_rol_epi32&expand=4685)
19483#[inline]
19484#[target_feature(enable = "avx512f")]
19485#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19486#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))]
19487#[rustc_legacy_const_generics(1)]
19488#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
19489pub const fn _mm512_rol_epi32<const IMM8: i32>(a: __m512i) -> __m512i {
19490 static_assert_uimm_bits!(IMM8, 8);
19491 _mm512_rolv_epi32(a, b:_mm512_set1_epi32(IMM8))
19492}
19493
19494/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19495///
19496/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_rol_epi32&expand=4683)
19497#[inline]
19498#[target_feature(enable = "avx512f")]
19499#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19500#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))]
19501#[rustc_legacy_const_generics(3)]
19502#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
19503pub const fn _mm512_mask_rol_epi32<const IMM8: i32>(
19504 src: __m512i,
19505 k: __mmask16,
19506 a: __m512i,
19507) -> __m512i {
19508 static_assert_uimm_bits!(IMM8, 8);
19509 _mm512_mask_rolv_epi32(src, k, a, b:_mm512_set1_epi32(IMM8))
19510}
19511
19512/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19513///
19514/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_rol_epi32&expand=4684)
19515#[inline]
19516#[target_feature(enable = "avx512f")]
19517#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19518#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))]
19519#[rustc_legacy_const_generics(2)]
19520#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
19521pub const fn _mm512_maskz_rol_epi32<const IMM8: i32>(k: __mmask16, a: __m512i) -> __m512i {
19522 static_assert_uimm_bits!(IMM8, 8);
19523 _mm512_maskz_rolv_epi32(k, a, b:_mm512_set1_epi32(IMM8))
19524}
19525
19526/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst.
19527///
19528/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_rol_epi32&expand=4682)
19529#[inline]
19530#[target_feature(enable = "avx512f,avx512vl")]
19531#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19532#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))]
19533#[rustc_legacy_const_generics(1)]
19534#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
19535pub const fn _mm256_rol_epi32<const IMM8: i32>(a: __m256i) -> __m256i {
19536 static_assert_uimm_bits!(IMM8, 8);
19537 _mm256_rolv_epi32(a, b:_mm256_set1_epi32(IMM8))
19538}
19539
19540/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19541///
19542/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_rol_epi32&expand=4680)
19543#[inline]
19544#[target_feature(enable = "avx512f,avx512vl")]
19545#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19546#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))]
19547#[rustc_legacy_const_generics(3)]
19548#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
19549pub const fn _mm256_mask_rol_epi32<const IMM8: i32>(
19550 src: __m256i,
19551 k: __mmask8,
19552 a: __m256i,
19553) -> __m256i {
19554 static_assert_uimm_bits!(IMM8, 8);
19555 _mm256_mask_rolv_epi32(src, k, a, b:_mm256_set1_epi32(IMM8))
19556}
19557
19558/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19559///
19560/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_rol_epi32&expand=4681)
19561#[inline]
19562#[target_feature(enable = "avx512f,avx512vl")]
19563#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19564#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))]
19565#[rustc_legacy_const_generics(2)]
19566#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
19567pub const fn _mm256_maskz_rol_epi32<const IMM8: i32>(k: __mmask8, a: __m256i) -> __m256i {
19568 static_assert_uimm_bits!(IMM8, 8);
19569 _mm256_maskz_rolv_epi32(k, a, b:_mm256_set1_epi32(IMM8))
19570}
19571
19572/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst.
19573///
19574/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rol_epi32&expand=4679)
19575#[inline]
19576#[target_feature(enable = "avx512f,avx512vl")]
19577#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19578#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))]
19579#[rustc_legacy_const_generics(1)]
19580#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
19581pub const fn _mm_rol_epi32<const IMM8: i32>(a: __m128i) -> __m128i {
19582 static_assert_uimm_bits!(IMM8, 8);
19583 _mm_rolv_epi32(a, b:_mm_set1_epi32(IMM8))
19584}
19585
19586/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19587///
19588/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_rol_epi32&expand=4677)
19589#[inline]
19590#[target_feature(enable = "avx512f,avx512vl")]
19591#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19592#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))]
19593#[rustc_legacy_const_generics(3)]
19594#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
19595pub const fn _mm_mask_rol_epi32<const IMM8: i32>(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
19596 static_assert_uimm_bits!(IMM8, 8);
19597 _mm_mask_rolv_epi32(src, k, a, b:_mm_set1_epi32(IMM8))
19598}
19599
19600/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19601///
19602/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_rol_epi32&expand=4678)
19603#[inline]
19604#[target_feature(enable = "avx512f,avx512vl")]
19605#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19606#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))]
19607#[rustc_legacy_const_generics(2)]
19608#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
19609pub const fn _mm_maskz_rol_epi32<const IMM8: i32>(k: __mmask8, a: __m128i) -> __m128i {
19610 static_assert_uimm_bits!(IMM8, 8);
19611 _mm_maskz_rolv_epi32(k, a, b:_mm_set1_epi32(IMM8))
19612}
19613
19614/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst.
19615///
19616/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_ror_epi32&expand=4721)
19617#[inline]
19618#[target_feature(enable = "avx512f")]
19619#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19620#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))]
19621#[rustc_legacy_const_generics(1)]
19622#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
19623pub const fn _mm512_ror_epi32<const IMM8: i32>(a: __m512i) -> __m512i {
19624 static_assert_uimm_bits!(IMM8, 8);
19625 _mm512_rorv_epi32(a, b:_mm512_set1_epi32(IMM8))
19626}
19627
19628/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19629///
19630/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_ror_epi32&expand=4719)
19631#[inline]
19632#[target_feature(enable = "avx512f")]
19633#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19634#[cfg_attr(test, assert_instr(vprold, IMM8 = 123))]
19635#[rustc_legacy_const_generics(3)]
19636#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
19637pub const fn _mm512_mask_ror_epi32<const IMM8: i32>(
19638 src: __m512i,
19639 k: __mmask16,
19640 a: __m512i,
19641) -> __m512i {
19642 static_assert_uimm_bits!(IMM8, 8);
19643 _mm512_mask_rorv_epi32(src, k, a, b:_mm512_set1_epi32(IMM8))
19644}
19645
19646/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19647///
19648/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_ror_epi32&expand=4720)
19649#[inline]
19650#[target_feature(enable = "avx512f")]
19651#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19652#[cfg_attr(test, assert_instr(vprold, IMM8 = 123))]
19653#[rustc_legacy_const_generics(2)]
19654#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
19655pub const fn _mm512_maskz_ror_epi32<const IMM8: i32>(k: __mmask16, a: __m512i) -> __m512i {
19656 static_assert_uimm_bits!(IMM8, 8);
19657 _mm512_maskz_rorv_epi32(k, a, b:_mm512_set1_epi32(IMM8))
19658}
19659
19660/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst.
19661///
19662/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_ror_epi32&expand=4718)
19663#[inline]
19664#[target_feature(enable = "avx512f,avx512vl")]
19665#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19666#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))]
19667#[rustc_legacy_const_generics(1)]
19668#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
19669pub const fn _mm256_ror_epi32<const IMM8: i32>(a: __m256i) -> __m256i {
19670 static_assert_uimm_bits!(IMM8, 8);
19671 _mm256_rorv_epi32(a, b:_mm256_set1_epi32(IMM8))
19672}
19673
19674/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19675///
19676/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_ror_epi32&expand=4716)
19677#[inline]
19678#[target_feature(enable = "avx512f,avx512vl")]
19679#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19680#[cfg_attr(test, assert_instr(vprold, IMM8 = 123))]
19681#[rustc_legacy_const_generics(3)]
19682#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
19683pub const fn _mm256_mask_ror_epi32<const IMM8: i32>(
19684 src: __m256i,
19685 k: __mmask8,
19686 a: __m256i,
19687) -> __m256i {
19688 static_assert_uimm_bits!(IMM8, 8);
19689 _mm256_mask_rorv_epi32(src, k, a, b:_mm256_set1_epi32(IMM8))
19690}
19691
19692/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19693///
19694/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_ror_epi32&expand=4717)
19695#[inline]
19696#[target_feature(enable = "avx512f,avx512vl")]
19697#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19698#[cfg_attr(test, assert_instr(vprold, IMM8 = 123))]
19699#[rustc_legacy_const_generics(2)]
19700#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
19701pub const fn _mm256_maskz_ror_epi32<const IMM8: i32>(k: __mmask8, a: __m256i) -> __m256i {
19702 static_assert_uimm_bits!(IMM8, 8);
19703 _mm256_maskz_rorv_epi32(k, a, b:_mm256_set1_epi32(IMM8))
19704}
19705
19706/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst.
19707///
19708/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_ror_epi32&expand=4715)
19709#[inline]
19710#[target_feature(enable = "avx512f,avx512vl")]
19711#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19712#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))]
19713#[rustc_legacy_const_generics(1)]
19714#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
19715pub const fn _mm_ror_epi32<const IMM8: i32>(a: __m128i) -> __m128i {
19716 static_assert_uimm_bits!(IMM8, 8);
19717 _mm_rorv_epi32(a, b:_mm_set1_epi32(IMM8))
19718}
19719
19720/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19721///
19722/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_ror_epi32&expand=4713)
19723#[inline]
19724#[target_feature(enable = "avx512f,avx512vl")]
19725#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19726#[cfg_attr(test, assert_instr(vprold, IMM8 = 123))]
19727#[rustc_legacy_const_generics(3)]
19728#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
19729pub const fn _mm_mask_ror_epi32<const IMM8: i32>(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
19730 static_assert_uimm_bits!(IMM8, 8);
19731 _mm_mask_rorv_epi32(src, k, a, b:_mm_set1_epi32(IMM8))
19732}
19733
19734/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19735///
19736/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_ror_epi32&expand=4714)
19737#[inline]
19738#[target_feature(enable = "avx512f,avx512vl")]
19739#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19740#[cfg_attr(test, assert_instr(vprold, IMM8 = 123))]
19741#[rustc_legacy_const_generics(2)]
19742#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
19743pub const fn _mm_maskz_ror_epi32<const IMM8: i32>(k: __mmask8, a: __m128i) -> __m128i {
19744 static_assert_uimm_bits!(IMM8, 8);
19745 _mm_maskz_rorv_epi32(k, a, b:_mm_set1_epi32(IMM8))
19746}
19747
19748/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst.
19749///
19750/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_rol_epi64&expand=4694)
19751#[inline]
19752#[target_feature(enable = "avx512f")]
19753#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19754#[cfg_attr(test, assert_instr(vprolq, IMM8 = 1))]
19755#[rustc_legacy_const_generics(1)]
19756#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
19757pub const fn _mm512_rol_epi64<const IMM8: i32>(a: __m512i) -> __m512i {
19758 static_assert_uimm_bits!(IMM8, 8);
19759 _mm512_rolv_epi64(a, b:_mm512_set1_epi64(IMM8 as i64))
19760}
19761
19762/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19763///
19764/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_rol_epi64&expand=4692)
19765#[inline]
19766#[target_feature(enable = "avx512f")]
19767#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19768#[cfg_attr(test, assert_instr(vprolq, IMM8 = 1))]
19769#[rustc_legacy_const_generics(3)]
19770#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
19771pub const fn _mm512_mask_rol_epi64<const IMM8: i32>(
19772 src: __m512i,
19773 k: __mmask8,
19774 a: __m512i,
19775) -> __m512i {
19776 static_assert_uimm_bits!(IMM8, 8);
19777 _mm512_mask_rolv_epi64(src, k, a, b:_mm512_set1_epi64(IMM8 as i64))
19778}
19779
19780/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19781///
19782/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_rol_epi64&expand=4693)
19783#[inline]
19784#[target_feature(enable = "avx512f")]
19785#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19786#[cfg_attr(test, assert_instr(vprolq, IMM8 = 1))]
19787#[rustc_legacy_const_generics(2)]
19788#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
19789pub const fn _mm512_maskz_rol_epi64<const IMM8: i32>(k: __mmask8, a: __m512i) -> __m512i {
19790 static_assert_uimm_bits!(IMM8, 8);
19791 _mm512_maskz_rolv_epi64(k, a, b:_mm512_set1_epi64(IMM8 as i64))
19792}
19793
19794/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst.
19795///
19796/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_rol_epi64&expand=4691)
19797#[inline]
19798#[target_feature(enable = "avx512f,avx512vl")]
19799#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19800#[cfg_attr(test, assert_instr(vprolq, IMM8 = 1))]
19801#[rustc_legacy_const_generics(1)]
19802#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
19803pub const fn _mm256_rol_epi64<const IMM8: i32>(a: __m256i) -> __m256i {
19804 static_assert_uimm_bits!(IMM8, 8);
19805 _mm256_rolv_epi64(a, b:_mm256_set1_epi64x(IMM8 as i64))
19806}
19807
19808/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19809///
19810/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_rol_epi64&expand=4689)
19811#[inline]
19812#[target_feature(enable = "avx512f,avx512vl")]
19813#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19814#[cfg_attr(test, assert_instr(vprolq, IMM8 = 1))]
19815#[rustc_legacy_const_generics(3)]
19816#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
19817pub const fn _mm256_mask_rol_epi64<const IMM8: i32>(
19818 src: __m256i,
19819 k: __mmask8,
19820 a: __m256i,
19821) -> __m256i {
19822 static_assert_uimm_bits!(IMM8, 8);
19823 _mm256_mask_rolv_epi64(src, k, a, b:_mm256_set1_epi64x(IMM8 as i64))
19824}
19825
19826/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19827///
19828/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_rol_epi64&expand=4690)
19829#[inline]
19830#[target_feature(enable = "avx512f,avx512vl")]
19831#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19832#[cfg_attr(test, assert_instr(vprolq, IMM8 = 1))]
19833#[rustc_legacy_const_generics(2)]
19834#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
19835pub const fn _mm256_maskz_rol_epi64<const IMM8: i32>(k: __mmask8, a: __m256i) -> __m256i {
19836 static_assert_uimm_bits!(IMM8, 8);
19837 _mm256_maskz_rolv_epi64(k, a, b:_mm256_set1_epi64x(IMM8 as i64))
19838}
19839
19840/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst.
19841///
19842/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rol_epi64&expand=4688)
19843#[inline]
19844#[target_feature(enable = "avx512f,avx512vl")]
19845#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19846#[cfg_attr(test, assert_instr(vprolq, IMM8 = 1))]
19847#[rustc_legacy_const_generics(1)]
19848#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
19849pub const fn _mm_rol_epi64<const IMM8: i32>(a: __m128i) -> __m128i {
19850 static_assert_uimm_bits!(IMM8, 8);
19851 _mm_rolv_epi64(a, b:_mm_set1_epi64x(IMM8 as i64))
19852}
19853
19854/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19855///
19856/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_rol_epi64&expand=4686)
19857#[inline]
19858#[target_feature(enable = "avx512f,avx512vl")]
19859#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19860#[cfg_attr(test, assert_instr(vprolq, IMM8 = 1))]
19861#[rustc_legacy_const_generics(3)]
19862#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
19863pub const fn _mm_mask_rol_epi64<const IMM8: i32>(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
19864 static_assert_uimm_bits!(IMM8, 8);
19865 _mm_mask_rolv_epi64(src, k, a, b:_mm_set1_epi64x(IMM8 as i64))
19866}
19867
19868/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19869///
19870/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_rol_epi64&expand=4687)
19871#[inline]
19872#[target_feature(enable = "avx512f,avx512vl")]
19873#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19874#[cfg_attr(test, assert_instr(vprolq, IMM8 = 1))]
19875#[rustc_legacy_const_generics(2)]
19876#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
19877pub const fn _mm_maskz_rol_epi64<const IMM8: i32>(k: __mmask8, a: __m128i) -> __m128i {
19878 static_assert_uimm_bits!(IMM8, 8);
19879 _mm_maskz_rolv_epi64(k, a, b:_mm_set1_epi64x(IMM8 as i64))
19880}
19881
19882/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst.
19883///
19884/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_ror_epi64&expand=4730)
19885#[inline]
19886#[target_feature(enable = "avx512f")]
19887#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19888#[cfg_attr(test, assert_instr(vprolq, IMM8 = 15))]
19889#[rustc_legacy_const_generics(1)]
19890#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
19891pub const fn _mm512_ror_epi64<const IMM8: i32>(a: __m512i) -> __m512i {
19892 static_assert_uimm_bits!(IMM8, 8);
19893 _mm512_rorv_epi64(a, b:_mm512_set1_epi64(IMM8 as i64))
19894}
19895
19896/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19897///
19898/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_ror_epi64&expand=4728)
19899#[inline]
19900#[target_feature(enable = "avx512f")]
19901#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19902#[cfg_attr(test, assert_instr(vprolq, IMM8 = 15))]
19903#[rustc_legacy_const_generics(3)]
19904#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
19905pub const fn _mm512_mask_ror_epi64<const IMM8: i32>(
19906 src: __m512i,
19907 k: __mmask8,
19908 a: __m512i,
19909) -> __m512i {
19910 static_assert_uimm_bits!(IMM8, 8);
19911 _mm512_mask_rorv_epi64(src, k, a, b:_mm512_set1_epi64(IMM8 as i64))
19912}
19913
19914/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19915///
19916/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_ror_epi64&expand=4729)
19917#[inline]
19918#[target_feature(enable = "avx512f")]
19919#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19920#[cfg_attr(test, assert_instr(vprolq, IMM8 = 15))]
19921#[rustc_legacy_const_generics(2)]
19922#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
19923pub const fn _mm512_maskz_ror_epi64<const IMM8: i32>(k: __mmask8, a: __m512i) -> __m512i {
19924 static_assert_uimm_bits!(IMM8, 8);
19925 _mm512_maskz_rorv_epi64(k, a, b:_mm512_set1_epi64(IMM8 as i64))
19926}
19927
19928/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst.
19929///
19930/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_ror_epi64&expand=4727)
19931#[inline]
19932#[target_feature(enable = "avx512f,avx512vl")]
19933#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19934#[cfg_attr(test, assert_instr(vprolq, IMM8 = 15))]
19935#[rustc_legacy_const_generics(1)]
19936#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
19937pub const fn _mm256_ror_epi64<const IMM8: i32>(a: __m256i) -> __m256i {
19938 static_assert_uimm_bits!(IMM8, 8);
19939 _mm256_rorv_epi64(a, b:_mm256_set1_epi64x(IMM8 as i64))
19940}
19941
19942/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19943///
19944/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_ror_epi64&expand=4725)
19945#[inline]
19946#[target_feature(enable = "avx512f,avx512vl")]
19947#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19948#[cfg_attr(test, assert_instr(vprolq, IMM8 = 15))]
19949#[rustc_legacy_const_generics(3)]
19950#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
19951pub const fn _mm256_mask_ror_epi64<const IMM8: i32>(
19952 src: __m256i,
19953 k: __mmask8,
19954 a: __m256i,
19955) -> __m256i {
19956 static_assert_uimm_bits!(IMM8, 8);
19957 _mm256_mask_rorv_epi64(src, k, a, b:_mm256_set1_epi64x(IMM8 as i64))
19958}
19959
19960/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19961///
19962/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_ror_epi64&expand=4726)
19963#[inline]
19964#[target_feature(enable = "avx512f,avx512vl")]
19965#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19966#[cfg_attr(test, assert_instr(vprolq, IMM8 = 15))]
19967#[rustc_legacy_const_generics(2)]
19968#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
19969pub const fn _mm256_maskz_ror_epi64<const IMM8: i32>(k: __mmask8, a: __m256i) -> __m256i {
19970 static_assert_uimm_bits!(IMM8, 8);
19971 _mm256_maskz_rorv_epi64(k, a, b:_mm256_set1_epi64x(IMM8 as i64))
19972}
19973
19974/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst.
19975///
19976/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_ror_epi64&expand=4724)
19977#[inline]
19978#[target_feature(enable = "avx512f,avx512vl")]
19979#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19980#[cfg_attr(test, assert_instr(vprolq, IMM8 = 15))]
19981#[rustc_legacy_const_generics(1)]
19982#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
19983pub const fn _mm_ror_epi64<const IMM8: i32>(a: __m128i) -> __m128i {
19984 static_assert_uimm_bits!(IMM8, 8);
19985 _mm_rorv_epi64(a, b:_mm_set1_epi64x(IMM8 as i64))
19986}
19987
19988/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19989///
19990/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_ror_epi64&expand=4722)
19991#[inline]
19992#[target_feature(enable = "avx512f,avx512vl")]
19993#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19994#[cfg_attr(test, assert_instr(vprolq, IMM8 = 15))]
19995#[rustc_legacy_const_generics(3)]
19996#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
19997pub const fn _mm_mask_ror_epi64<const IMM8: i32>(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
19998 static_assert_uimm_bits!(IMM8, 8);
19999 _mm_mask_rorv_epi64(src, k, a, b:_mm_set1_epi64x(IMM8 as i64))
20000}
20001
20002/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20003///
20004/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_ror_epi64&expand=4723)
20005#[inline]
20006#[target_feature(enable = "avx512f,avx512vl")]
20007#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20008#[cfg_attr(test, assert_instr(vprolq, IMM8 = 15))]
20009#[rustc_legacy_const_generics(2)]
20010#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
20011pub const fn _mm_maskz_ror_epi64<const IMM8: i32>(k: __mmask8, a: __m128i) -> __m128i {
20012 static_assert_uimm_bits!(IMM8, 8);
20013 _mm_maskz_rorv_epi64(k, a, b:_mm_set1_epi64x(IMM8 as i64))
20014}
20015
20016/// Shift packed 32-bit integers in a left by imm8 while shifting in zeros, and store the results in dst.
20017///
20018/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_slli_epi32&expand=5310)
20019#[inline]
20020#[target_feature(enable = "avx512f")]
20021#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20022#[cfg_attr(test, assert_instr(vpslld, IMM8 = 5))]
20023#[rustc_legacy_const_generics(1)]
20024#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
20025pub const fn _mm512_slli_epi32<const IMM8: u32>(a: __m512i) -> __m512i {
20026 unsafe {
20027 static_assert_uimm_bits!(IMM8, 8);
20028 if IMM8 >= 32 {
20029 _mm512_setzero_si512()
20030 } else {
20031 transmute(src:simd_shl(lhs:a.as_u32x16(), rhs:u32x16::splat(IMM8)))
20032 }
20033 }
20034}
20035
20036/// Shift packed 32-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20037///
20038/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_slli_epi32&expand=5308)
20039#[inline]
20040#[target_feature(enable = "avx512f")]
20041#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20042#[cfg_attr(test, assert_instr(vpslld, IMM8 = 5))]
20043#[rustc_legacy_const_generics(3)]
20044#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
20045pub const fn _mm512_mask_slli_epi32<const IMM8: u32>(
20046 src: __m512i,
20047 k: __mmask16,
20048 a: __m512i,
20049) -> __m512i {
20050 unsafe {
20051 static_assert_uimm_bits!(IMM8, 8);
20052 let shf: Simd = if IMM8 >= 32 {
20053 u32x16::ZERO
20054 } else {
20055 simd_shl(lhs:a.as_u32x16(), rhs:u32x16::splat(IMM8))
20056 };
20057 transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_u32x16()))
20058 }
20059}
20060
20061/// Shift packed 32-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20062///
20063/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_slli_epi32&expand=5309)
20064#[inline]
20065#[target_feature(enable = "avx512f")]
20066#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20067#[cfg_attr(test, assert_instr(vpslld, IMM8 = 5))]
20068#[rustc_legacy_const_generics(2)]
20069#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
20070pub const fn _mm512_maskz_slli_epi32<const IMM8: u32>(k: __mmask16, a: __m512i) -> __m512i {
20071 unsafe {
20072 static_assert_uimm_bits!(IMM8, 8);
20073 if IMM8 >= 32 {
20074 _mm512_setzero_si512()
20075 } else {
20076 let shf: Simd = simd_shl(lhs:a.as_u32x16(), rhs:u32x16::splat(IMM8));
20077 transmute(src:simd_select_bitmask(m:k, yes:shf, no:u32x16::ZERO))
20078 }
20079 }
20080}
20081
20082/// Shift packed 32-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20083///
20084/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_slli_epi32&expand=5305)
20085#[inline]
20086#[target_feature(enable = "avx512f,avx512vl")]
20087#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20088#[cfg_attr(test, assert_instr(vpslld, IMM8 = 5))]
20089#[rustc_legacy_const_generics(3)]
20090#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
20091pub const fn _mm256_mask_slli_epi32<const IMM8: u32>(
20092 src: __m256i,
20093 k: __mmask8,
20094 a: __m256i,
20095) -> __m256i {
20096 unsafe {
20097 static_assert_uimm_bits!(IMM8, 8);
20098 let r: Simd = if IMM8 >= 32 {
20099 u32x8::ZERO
20100 } else {
20101 simd_shl(lhs:a.as_u32x8(), rhs:u32x8::splat(IMM8))
20102 };
20103 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_u32x8()))
20104 }
20105}
20106
20107/// Shift packed 32-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20108///
20109/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_slli_epi32&expand=5306)
20110#[inline]
20111#[target_feature(enable = "avx512f,avx512vl")]
20112#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20113#[cfg_attr(test, assert_instr(vpslld, IMM8 = 5))]
20114#[rustc_legacy_const_generics(2)]
20115#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
20116pub const fn _mm256_maskz_slli_epi32<const IMM8: u32>(k: __mmask8, a: __m256i) -> __m256i {
20117 unsafe {
20118 static_assert_uimm_bits!(IMM8, 8);
20119 if IMM8 >= 32 {
20120 _mm256_setzero_si256()
20121 } else {
20122 let r: Simd = simd_shl(lhs:a.as_u32x8(), rhs:u32x8::splat(IMM8));
20123 transmute(src:simd_select_bitmask(m:k, yes:r, no:u32x8::ZERO))
20124 }
20125 }
20126}
20127
20128/// Shift packed 32-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20129///
20130/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_slli_epi32&expand=5302)
20131#[inline]
20132#[target_feature(enable = "avx512f,avx512vl")]
20133#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20134#[cfg_attr(test, assert_instr(vpslld, IMM8 = 5))]
20135#[rustc_legacy_const_generics(3)]
20136#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
20137pub const fn _mm_mask_slli_epi32<const IMM8: u32>(
20138 src: __m128i,
20139 k: __mmask8,
20140 a: __m128i,
20141) -> __m128i {
20142 unsafe {
20143 static_assert_uimm_bits!(IMM8, 8);
20144 let r: Simd = if IMM8 >= 32 {
20145 u32x4::ZERO
20146 } else {
20147 simd_shl(lhs:a.as_u32x4(), rhs:u32x4::splat(IMM8))
20148 };
20149 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_u32x4()))
20150 }
20151}
20152
20153/// Shift packed 32-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20154///
20155/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_slli_epi32&expand=5303)
20156#[inline]
20157#[target_feature(enable = "avx512f,avx512vl")]
20158#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20159#[cfg_attr(test, assert_instr(vpslld, IMM8 = 5))]
20160#[rustc_legacy_const_generics(2)]
20161#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
20162pub const fn _mm_maskz_slli_epi32<const IMM8: u32>(k: __mmask8, a: __m128i) -> __m128i {
20163 unsafe {
20164 static_assert_uimm_bits!(IMM8, 8);
20165 if IMM8 >= 32 {
20166 _mm_setzero_si128()
20167 } else {
20168 let r: Simd = simd_shl(lhs:a.as_u32x4(), rhs:u32x4::splat(IMM8));
20169 transmute(src:simd_select_bitmask(m:k, yes:r, no:u32x4::ZERO))
20170 }
20171 }
20172}
20173
20174/// Shift packed 32-bit integers in a right by imm8 while shifting in zeros, and store the results in dst.
20175///
20176/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srli_epi32&expand=5522)
20177#[inline]
20178#[target_feature(enable = "avx512f")]
20179#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20180#[cfg_attr(test, assert_instr(vpsrld, IMM8 = 1))]
20181#[rustc_legacy_const_generics(1)]
20182#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
20183pub const fn _mm512_srli_epi32<const IMM8: u32>(a: __m512i) -> __m512i {
20184 unsafe {
20185 static_assert_uimm_bits!(IMM8, 8);
20186 if IMM8 >= 32 {
20187 _mm512_setzero_si512()
20188 } else {
20189 transmute(src:simd_shr(lhs:a.as_u32x16(), rhs:u32x16::splat(IMM8)))
20190 }
20191 }
20192}
20193
20194/// Shift packed 32-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20195///
20196/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srli_epi32&expand=5520)
20197#[inline]
20198#[target_feature(enable = "avx512f")]
20199#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20200#[cfg_attr(test, assert_instr(vpsrld, IMM8 = 1))]
20201#[rustc_legacy_const_generics(3)]
20202#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
20203pub const fn _mm512_mask_srli_epi32<const IMM8: u32>(
20204 src: __m512i,
20205 k: __mmask16,
20206 a: __m512i,
20207) -> __m512i {
20208 unsafe {
20209 static_assert_uimm_bits!(IMM8, 8);
20210 let shf: Simd = if IMM8 >= 32 {
20211 u32x16::ZERO
20212 } else {
20213 simd_shr(lhs:a.as_u32x16(), rhs:u32x16::splat(IMM8))
20214 };
20215 transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_u32x16()))
20216 }
20217}
20218
20219/// Shift packed 32-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20220///
20221/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srli_epi32&expand=5521)
20222#[inline]
20223#[target_feature(enable = "avx512f")]
20224#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20225#[cfg_attr(test, assert_instr(vpsrld, IMM8 = 1))]
20226#[rustc_legacy_const_generics(2)]
20227#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
20228pub const fn _mm512_maskz_srli_epi32<const IMM8: u32>(k: __mmask16, a: __m512i) -> __m512i {
20229 unsafe {
20230 static_assert_uimm_bits!(IMM8, 8);
20231 if IMM8 >= 32 {
20232 _mm512_setzero_si512()
20233 } else {
20234 let shf: Simd = simd_shr(lhs:a.as_u32x16(), rhs:u32x16::splat(IMM8));
20235 transmute(src:simd_select_bitmask(m:k, yes:shf, no:u32x16::ZERO))
20236 }
20237 }
20238}
20239
20240/// Shift packed 32-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20241///
20242/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srli_epi32&expand=5517)
20243#[inline]
20244#[target_feature(enable = "avx512f,avx512vl")]
20245#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20246#[cfg_attr(test, assert_instr(vpsrld, IMM8 = 1))]
20247#[rustc_legacy_const_generics(3)]
20248#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
20249pub const fn _mm256_mask_srli_epi32<const IMM8: u32>(
20250 src: __m256i,
20251 k: __mmask8,
20252 a: __m256i,
20253) -> __m256i {
20254 unsafe {
20255 static_assert_uimm_bits!(IMM8, 8);
20256 let r: Simd = if IMM8 >= 32 {
20257 u32x8::ZERO
20258 } else {
20259 simd_shr(lhs:a.as_u32x8(), rhs:u32x8::splat(IMM8))
20260 };
20261 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_u32x8()))
20262 }
20263}
20264
20265/// Shift packed 32-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20266///
20267/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srli_epi32&expand=5518)
20268#[inline]
20269#[target_feature(enable = "avx512f,avx512vl")]
20270#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20271#[cfg_attr(test, assert_instr(vpsrld, IMM8 = 1))]
20272#[rustc_legacy_const_generics(2)]
20273#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
20274pub const fn _mm256_maskz_srli_epi32<const IMM8: u32>(k: __mmask8, a: __m256i) -> __m256i {
20275 unsafe {
20276 static_assert_uimm_bits!(IMM8, 8);
20277 if IMM8 >= 32 {
20278 _mm256_setzero_si256()
20279 } else {
20280 let r: Simd = simd_shr(lhs:a.as_u32x8(), rhs:u32x8::splat(IMM8));
20281 transmute(src:simd_select_bitmask(m:k, yes:r, no:u32x8::ZERO))
20282 }
20283 }
20284}
20285
20286/// Shift packed 32-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20287///
20288/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srli_epi32&expand=5514)
20289#[inline]
20290#[target_feature(enable = "avx512f,avx512vl")]
20291#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20292#[cfg_attr(test, assert_instr(vpsrld, IMM8 = 1))]
20293#[rustc_legacy_const_generics(3)]
20294#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
20295pub const fn _mm_mask_srli_epi32<const IMM8: u32>(
20296 src: __m128i,
20297 k: __mmask8,
20298 a: __m128i,
20299) -> __m128i {
20300 unsafe {
20301 static_assert_uimm_bits!(IMM8, 8);
20302 let r: Simd = if IMM8 >= 32 {
20303 u32x4::ZERO
20304 } else {
20305 simd_shr(lhs:a.as_u32x4(), rhs:u32x4::splat(IMM8))
20306 };
20307 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_u32x4()))
20308 }
20309}
20310
20311/// Shift packed 32-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20312///
20313/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srli_epi32&expand=5515)
20314#[inline]
20315#[target_feature(enable = "avx512f,avx512vl")]
20316#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20317#[cfg_attr(test, assert_instr(vpsrld, IMM8 = 1))]
20318#[rustc_legacy_const_generics(2)]
20319#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
20320pub const fn _mm_maskz_srli_epi32<const IMM8: u32>(k: __mmask8, a: __m128i) -> __m128i {
20321 unsafe {
20322 static_assert_uimm_bits!(IMM8, 8);
20323 if IMM8 >= 32 {
20324 _mm_setzero_si128()
20325 } else {
20326 let r: Simd = simd_shr(lhs:a.as_u32x4(), rhs:u32x4::splat(IMM8));
20327 transmute(src:simd_select_bitmask(m:k, yes:r, no:u32x4::ZERO))
20328 }
20329 }
20330}
20331
20332/// Shift packed 64-bit integers in a left by imm8 while shifting in zeros, and store the results in dst.
20333///
20334/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_slli_epi64&expand=5319)
20335#[inline]
20336#[target_feature(enable = "avx512f")]
20337#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20338#[cfg_attr(test, assert_instr(vpsllq, IMM8 = 5))]
20339#[rustc_legacy_const_generics(1)]
20340#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
20341pub const fn _mm512_slli_epi64<const IMM8: u32>(a: __m512i) -> __m512i {
20342 unsafe {
20343 static_assert_uimm_bits!(IMM8, 8);
20344 if IMM8 >= 64 {
20345 _mm512_setzero_si512()
20346 } else {
20347 transmute(src:simd_shl(lhs:a.as_u64x8(), rhs:u64x8::splat(IMM8 as u64)))
20348 }
20349 }
20350}
20351
20352/// Shift packed 64-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20353///
20354/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_slli_epi64&expand=5317)
20355#[inline]
20356#[target_feature(enable = "avx512f")]
20357#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20358#[cfg_attr(test, assert_instr(vpsllq, IMM8 = 5))]
20359#[rustc_legacy_const_generics(3)]
20360#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
20361pub const fn _mm512_mask_slli_epi64<const IMM8: u32>(
20362 src: __m512i,
20363 k: __mmask8,
20364 a: __m512i,
20365) -> __m512i {
20366 unsafe {
20367 static_assert_uimm_bits!(IMM8, 8);
20368 let shf: Simd = if IMM8 >= 64 {
20369 u64x8::ZERO
20370 } else {
20371 simd_shl(lhs:a.as_u64x8(), rhs:u64x8::splat(IMM8 as u64))
20372 };
20373 transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_u64x8()))
20374 }
20375}
20376
20377/// Shift packed 64-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20378///
20379/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_slli_epi64&expand=5318)
20380#[inline]
20381#[target_feature(enable = "avx512f")]
20382#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20383#[cfg_attr(test, assert_instr(vpsllq, IMM8 = 5))]
20384#[rustc_legacy_const_generics(2)]
20385#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
20386pub const fn _mm512_maskz_slli_epi64<const IMM8: u32>(k: __mmask8, a: __m512i) -> __m512i {
20387 unsafe {
20388 static_assert_uimm_bits!(IMM8, 8);
20389 if IMM8 >= 64 {
20390 _mm512_setzero_si512()
20391 } else {
20392 let shf: Simd = simd_shl(lhs:a.as_u64x8(), rhs:u64x8::splat(IMM8 as u64));
20393 transmute(src:simd_select_bitmask(m:k, yes:shf, no:u64x8::ZERO))
20394 }
20395 }
20396}
20397
20398/// Shift packed 64-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20399///
20400/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_slli_epi64&expand=5314)
20401#[inline]
20402#[target_feature(enable = "avx512f,avx512vl")]
20403#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20404#[cfg_attr(test, assert_instr(vpsllq, IMM8 = 5))]
20405#[rustc_legacy_const_generics(3)]
20406#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
20407pub const fn _mm256_mask_slli_epi64<const IMM8: u32>(
20408 src: __m256i,
20409 k: __mmask8,
20410 a: __m256i,
20411) -> __m256i {
20412 unsafe {
20413 static_assert_uimm_bits!(IMM8, 8);
20414 let r: Simd = if IMM8 >= 64 {
20415 u64x4::ZERO
20416 } else {
20417 simd_shl(lhs:a.as_u64x4(), rhs:u64x4::splat(IMM8 as u64))
20418 };
20419 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_u64x4()))
20420 }
20421}
20422
20423/// Shift packed 64-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20424///
20425/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_slli_epi64&expand=5315)
20426#[inline]
20427#[target_feature(enable = "avx512f,avx512vl")]
20428#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20429#[cfg_attr(test, assert_instr(vpsllq, IMM8 = 5))]
20430#[rustc_legacy_const_generics(2)]
20431#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
20432pub const fn _mm256_maskz_slli_epi64<const IMM8: u32>(k: __mmask8, a: __m256i) -> __m256i {
20433 unsafe {
20434 static_assert_uimm_bits!(IMM8, 8);
20435 if IMM8 >= 64 {
20436 _mm256_setzero_si256()
20437 } else {
20438 let r: Simd = simd_shl(lhs:a.as_u64x4(), rhs:u64x4::splat(IMM8 as u64));
20439 transmute(src:simd_select_bitmask(m:k, yes:r, no:u64x4::ZERO))
20440 }
20441 }
20442}
20443
20444/// Shift packed 64-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20445///
20446/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_slli_epi64&expand=5311)
20447#[inline]
20448#[target_feature(enable = "avx512f,avx512vl")]
20449#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20450#[cfg_attr(test, assert_instr(vpsllq, IMM8 = 5))]
20451#[rustc_legacy_const_generics(3)]
20452#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
20453pub const fn _mm_mask_slli_epi64<const IMM8: u32>(
20454 src: __m128i,
20455 k: __mmask8,
20456 a: __m128i,
20457) -> __m128i {
20458 unsafe {
20459 static_assert_uimm_bits!(IMM8, 8);
20460 let r: Simd = if IMM8 >= 64 {
20461 u64x2::ZERO
20462 } else {
20463 simd_shl(lhs:a.as_u64x2(), rhs:u64x2::splat(IMM8 as u64))
20464 };
20465 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_u64x2()))
20466 }
20467}
20468
20469/// Shift packed 64-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20470///
20471/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_slli_epi64&expand=5312)
20472#[inline]
20473#[target_feature(enable = "avx512f,avx512vl")]
20474#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20475#[cfg_attr(test, assert_instr(vpsllq, IMM8 = 5))]
20476#[rustc_legacy_const_generics(2)]
20477#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
20478pub const fn _mm_maskz_slli_epi64<const IMM8: u32>(k: __mmask8, a: __m128i) -> __m128i {
20479 unsafe {
20480 static_assert_uimm_bits!(IMM8, 8);
20481 if IMM8 >= 64 {
20482 _mm_setzero_si128()
20483 } else {
20484 let r: Simd = simd_shl(lhs:a.as_u64x2(), rhs:u64x2::splat(IMM8 as u64));
20485 transmute(src:simd_select_bitmask(m:k, yes:r, no:u64x2::ZERO))
20486 }
20487 }
20488}
20489
20490/// Shift packed 64-bit integers in a right by imm8 while shifting in zeros, and store the results in dst.
20491///
20492/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srli_epi64&expand=5531)
20493#[inline]
20494#[target_feature(enable = "avx512f")]
20495#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20496#[cfg_attr(test, assert_instr(vpsrlq, IMM8 = 1))]
20497#[rustc_legacy_const_generics(1)]
20498#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
20499pub const fn _mm512_srli_epi64<const IMM8: u32>(a: __m512i) -> __m512i {
20500 unsafe {
20501 static_assert_uimm_bits!(IMM8, 8);
20502 if IMM8 >= 64 {
20503 _mm512_setzero_si512()
20504 } else {
20505 transmute(src:simd_shr(lhs:a.as_u64x8(), rhs:u64x8::splat(IMM8 as u64)))
20506 }
20507 }
20508}
20509
20510/// Shift packed 64-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20511///
20512/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srli_epi64&expand=5529)
20513#[inline]
20514#[target_feature(enable = "avx512f")]
20515#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20516#[cfg_attr(test, assert_instr(vpsrlq, IMM8 = 1))]
20517#[rustc_legacy_const_generics(3)]
20518#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
20519pub const fn _mm512_mask_srli_epi64<const IMM8: u32>(
20520 src: __m512i,
20521 k: __mmask8,
20522 a: __m512i,
20523) -> __m512i {
20524 unsafe {
20525 static_assert_uimm_bits!(IMM8, 8);
20526 let shf: Simd = if IMM8 >= 64 {
20527 u64x8::ZERO
20528 } else {
20529 simd_shr(lhs:a.as_u64x8(), rhs:u64x8::splat(IMM8 as u64))
20530 };
20531 transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_u64x8()))
20532 }
20533}
20534
20535/// Shift packed 64-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20536///
20537/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srli_epi64&expand=5530)
20538#[inline]
20539#[target_feature(enable = "avx512f")]
20540#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20541#[cfg_attr(test, assert_instr(vpsrlq, IMM8 = 1))]
20542#[rustc_legacy_const_generics(2)]
20543#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
20544pub const fn _mm512_maskz_srli_epi64<const IMM8: u32>(k: __mmask8, a: __m512i) -> __m512i {
20545 unsafe {
20546 static_assert_uimm_bits!(IMM8, 8);
20547 if IMM8 >= 64 {
20548 _mm512_setzero_si512()
20549 } else {
20550 let shf: Simd = simd_shr(lhs:a.as_u64x8(), rhs:u64x8::splat(IMM8 as u64));
20551 transmute(src:simd_select_bitmask(m:k, yes:shf, no:u64x8::ZERO))
20552 }
20553 }
20554}
20555
20556/// Shift packed 64-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20557///
20558/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srli_epi64&expand=5526)
20559#[inline]
20560#[target_feature(enable = "avx512f,avx512vl")]
20561#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20562#[cfg_attr(test, assert_instr(vpsrlq, IMM8 = 1))]
20563#[rustc_legacy_const_generics(3)]
20564#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
20565pub const fn _mm256_mask_srli_epi64<const IMM8: u32>(
20566 src: __m256i,
20567 k: __mmask8,
20568 a: __m256i,
20569) -> __m256i {
20570 unsafe {
20571 static_assert_uimm_bits!(IMM8, 8);
20572 let r: Simd = if IMM8 >= 64 {
20573 u64x4::ZERO
20574 } else {
20575 simd_shr(lhs:a.as_u64x4(), rhs:u64x4::splat(IMM8 as u64))
20576 };
20577 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_u64x4()))
20578 }
20579}
20580
20581/// Shift packed 64-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20582///
20583/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srli_epi64&expand=5527)
20584#[inline]
20585#[target_feature(enable = "avx512f,avx512vl")]
20586#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20587#[cfg_attr(test, assert_instr(vpsrlq, IMM8 = 1))]
20588#[rustc_legacy_const_generics(2)]
20589#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
20590pub const fn _mm256_maskz_srli_epi64<const IMM8: u32>(k: __mmask8, a: __m256i) -> __m256i {
20591 unsafe {
20592 static_assert_uimm_bits!(IMM8, 8);
20593 if IMM8 >= 64 {
20594 _mm256_setzero_si256()
20595 } else {
20596 let r: Simd = simd_shr(lhs:a.as_u64x4(), rhs:u64x4::splat(IMM8 as u64));
20597 transmute(src:simd_select_bitmask(m:k, yes:r, no:u64x4::ZERO))
20598 }
20599 }
20600}
20601
20602/// Shift packed 64-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20603///
20604/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srli_epi64&expand=5523)
20605#[inline]
20606#[target_feature(enable = "avx512f,avx512vl")]
20607#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20608#[cfg_attr(test, assert_instr(vpsrlq, IMM8 = 1))]
20609#[rustc_legacy_const_generics(3)]
20610#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
20611pub const fn _mm_mask_srli_epi64<const IMM8: u32>(
20612 src: __m128i,
20613 k: __mmask8,
20614 a: __m128i,
20615) -> __m128i {
20616 unsafe {
20617 static_assert_uimm_bits!(IMM8, 8);
20618 let r: Simd = if IMM8 >= 64 {
20619 u64x2::ZERO
20620 } else {
20621 simd_shr(lhs:a.as_u64x2(), rhs:u64x2::splat(IMM8 as u64))
20622 };
20623 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_u64x2()))
20624 }
20625}
20626
20627/// Shift packed 64-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20628///
20629/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srli_epi64&expand=5524)
20630#[inline]
20631#[target_feature(enable = "avx512f,avx512vl")]
20632#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20633#[cfg_attr(test, assert_instr(vpsrlq, IMM8 = 1))]
20634#[rustc_legacy_const_generics(2)]
20635#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
20636pub const fn _mm_maskz_srli_epi64<const IMM8: u32>(k: __mmask8, a: __m128i) -> __m128i {
20637 unsafe {
20638 static_assert_uimm_bits!(IMM8, 8);
20639 if IMM8 >= 64 {
20640 _mm_setzero_si128()
20641 } else {
20642 let r: Simd = simd_shr(lhs:a.as_u64x2(), rhs:u64x2::splat(IMM8 as u64));
20643 transmute(src:simd_select_bitmask(m:k, yes:r, no:u64x2::ZERO))
20644 }
20645 }
20646}
20647
20648/// Shift packed 32-bit integers in a left by count while shifting in zeros, and store the results in dst.
20649///
20650/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sll_epi32&expand=5280)
20651#[inline]
20652#[target_feature(enable = "avx512f")]
20653#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20654#[cfg_attr(test, assert_instr(vpslld))]
20655pub fn _mm512_sll_epi32(a: __m512i, count: __m128i) -> __m512i {
20656 unsafe { transmute(src:vpslld(a.as_i32x16(), count.as_i32x4())) }
20657}
20658
20659/// Shift packed 32-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20660///
20661/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sll_epi32&expand=5278)
20662#[inline]
20663#[target_feature(enable = "avx512f")]
20664#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20665#[cfg_attr(test, assert_instr(vpslld))]
20666pub fn _mm512_mask_sll_epi32(src: __m512i, k: __mmask16, a: __m512i, count: __m128i) -> __m512i {
20667 unsafe {
20668 let shf: Simd = _mm512_sll_epi32(a, count).as_i32x16();
20669 transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i32x16()))
20670 }
20671}
20672
20673/// Shift packed 32-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20674///
20675/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sll_epi32&expand=5279)
20676#[inline]
20677#[target_feature(enable = "avx512f")]
20678#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20679#[cfg_attr(test, assert_instr(vpslld))]
20680pub fn _mm512_maskz_sll_epi32(k: __mmask16, a: __m512i, count: __m128i) -> __m512i {
20681 unsafe {
20682 let shf: Simd = _mm512_sll_epi32(a, count).as_i32x16();
20683 transmute(src:simd_select_bitmask(m:k, yes:shf, no:i32x16::ZERO))
20684 }
20685}
20686
20687/// Shift packed 32-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20688///
20689/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sll_epi32&expand=5275)
20690#[inline]
20691#[target_feature(enable = "avx512f,avx512vl")]
20692#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20693#[cfg_attr(test, assert_instr(vpslld))]
20694pub fn _mm256_mask_sll_epi32(src: __m256i, k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
20695 unsafe {
20696 let shf: Simd = _mm256_sll_epi32(a, count).as_i32x8();
20697 transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i32x8()))
20698 }
20699}
20700
20701/// Shift packed 32-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20702///
20703/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sll_epi32&expand=5276)
20704#[inline]
20705#[target_feature(enable = "avx512f,avx512vl")]
20706#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20707#[cfg_attr(test, assert_instr(vpslld))]
20708pub fn _mm256_maskz_sll_epi32(k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
20709 unsafe {
20710 let shf: Simd = _mm256_sll_epi32(a, count).as_i32x8();
20711 transmute(src:simd_select_bitmask(m:k, yes:shf, no:i32x8::ZERO))
20712 }
20713}
20714
20715/// Shift packed 32-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20716///
20717/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sll_epi32&expand=5272)
20718#[inline]
20719#[target_feature(enable = "avx512f,avx512vl")]
20720#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20721#[cfg_attr(test, assert_instr(vpslld))]
20722pub fn _mm_mask_sll_epi32(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
20723 unsafe {
20724 let shf: Simd = _mm_sll_epi32(a, count).as_i32x4();
20725 transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i32x4()))
20726 }
20727}
20728
20729/// Shift packed 32-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20730///
20731/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sll_epi32&expand=5273)
20732#[inline]
20733#[target_feature(enable = "avx512f,avx512vl")]
20734#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20735#[cfg_attr(test, assert_instr(vpslld))]
20736pub fn _mm_maskz_sll_epi32(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
20737 unsafe {
20738 let shf: Simd = _mm_sll_epi32(a, count).as_i32x4();
20739 transmute(src:simd_select_bitmask(m:k, yes:shf, no:i32x4::ZERO))
20740 }
20741}
20742
20743/// Shift packed 32-bit integers in a right by count while shifting in zeros, and store the results in dst.
20744///
20745/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srl_epi32&expand=5492)
20746#[inline]
20747#[target_feature(enable = "avx512f")]
20748#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20749#[cfg_attr(test, assert_instr(vpsrld))]
20750pub fn _mm512_srl_epi32(a: __m512i, count: __m128i) -> __m512i {
20751 unsafe { transmute(src:vpsrld(a.as_i32x16(), count.as_i32x4())) }
20752}
20753
20754/// Shift packed 32-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20755///
20756/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srl_epi32&expand=5490)
20757#[inline]
20758#[target_feature(enable = "avx512f")]
20759#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20760#[cfg_attr(test, assert_instr(vpsrld))]
20761pub fn _mm512_mask_srl_epi32(src: __m512i, k: __mmask16, a: __m512i, count: __m128i) -> __m512i {
20762 unsafe {
20763 let shf: Simd = _mm512_srl_epi32(a, count).as_i32x16();
20764 transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i32x16()))
20765 }
20766}
20767
20768/// Shift packed 32-bit integers in a right by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20769///
20770/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srl_epi32&expand=5491)
20771#[inline]
20772#[target_feature(enable = "avx512f")]
20773#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20774#[cfg_attr(test, assert_instr(vpsrld))]
20775pub fn _mm512_maskz_srl_epi32(k: __mmask16, a: __m512i, count: __m128i) -> __m512i {
20776 unsafe {
20777 let shf: Simd = _mm512_srl_epi32(a, count).as_i32x16();
20778 transmute(src:simd_select_bitmask(m:k, yes:shf, no:i32x16::ZERO))
20779 }
20780}
20781
20782/// Shift packed 32-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20783///
20784/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srl_epi32&expand=5487)
20785#[inline]
20786#[target_feature(enable = "avx512f,avx512vl")]
20787#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20788#[cfg_attr(test, assert_instr(vpsrld))]
20789pub fn _mm256_mask_srl_epi32(src: __m256i, k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
20790 unsafe {
20791 let shf: Simd = _mm256_srl_epi32(a, count).as_i32x8();
20792 transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i32x8()))
20793 }
20794}
20795
20796/// Shift packed 32-bit integers in a right by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20797///
20798/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srl_epi32&expand=5488)
20799#[inline]
20800#[target_feature(enable = "avx512f,avx512vl")]
20801#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20802#[cfg_attr(test, assert_instr(vpsrld))]
20803pub fn _mm256_maskz_srl_epi32(k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
20804 unsafe {
20805 let shf: Simd = _mm256_srl_epi32(a, count).as_i32x8();
20806 transmute(src:simd_select_bitmask(m:k, yes:shf, no:i32x8::ZERO))
20807 }
20808}
20809
20810/// Shift packed 32-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20811///
20812/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srl_epi32&expand=5484)
20813#[inline]
20814#[target_feature(enable = "avx512f,avx512vl")]
20815#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20816#[cfg_attr(test, assert_instr(vpsrld))]
20817pub fn _mm_mask_srl_epi32(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
20818 unsafe {
20819 let shf: Simd = _mm_srl_epi32(a, count).as_i32x4();
20820 transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i32x4()))
20821 }
20822}
20823
20824/// Shift packed 32-bit integers in a right by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20825///
20826/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srl_epi32&expand=5485)
20827#[inline]
20828#[target_feature(enable = "avx512f,avx512vl")]
20829#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20830#[cfg_attr(test, assert_instr(vpsrld))]
20831pub fn _mm_maskz_srl_epi32(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
20832 unsafe {
20833 let shf: Simd = _mm_srl_epi32(a, count).as_i32x4();
20834 transmute(src:simd_select_bitmask(m:k, yes:shf, no:i32x4::ZERO))
20835 }
20836}
20837
20838/// Shift packed 64-bit integers in a left by count while shifting in zeros, and store the results in dst.
20839///
20840/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sll_epi64&expand=5289)
20841#[inline]
20842#[target_feature(enable = "avx512f")]
20843#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20844#[cfg_attr(test, assert_instr(vpsllq))]
20845pub fn _mm512_sll_epi64(a: __m512i, count: __m128i) -> __m512i {
20846 unsafe { transmute(src:vpsllq(a.as_i64x8(), count.as_i64x2())) }
20847}
20848
20849/// Shift packed 64-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20850///
20851/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sll_epi64&expand=5287)
20852#[inline]
20853#[target_feature(enable = "avx512f")]
20854#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20855#[cfg_attr(test, assert_instr(vpsllq))]
20856pub fn _mm512_mask_sll_epi64(src: __m512i, k: __mmask8, a: __m512i, count: __m128i) -> __m512i {
20857 unsafe {
20858 let shf: Simd = _mm512_sll_epi64(a, count).as_i64x8();
20859 transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i64x8()))
20860 }
20861}
20862
20863/// Shift packed 64-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20864///
20865/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sll_epi64&expand=5288)
20866#[inline]
20867#[target_feature(enable = "avx512f")]
20868#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20869#[cfg_attr(test, assert_instr(vpsllq))]
20870pub fn _mm512_maskz_sll_epi64(k: __mmask8, a: __m512i, count: __m128i) -> __m512i {
20871 unsafe {
20872 let shf: Simd = _mm512_sll_epi64(a, count).as_i64x8();
20873 transmute(src:simd_select_bitmask(m:k, yes:shf, no:i64x8::ZERO))
20874 }
20875}
20876
20877/// Shift packed 64-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20878///
20879/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sll_epi64&expand=5284)
20880#[inline]
20881#[target_feature(enable = "avx512f,avx512vl")]
20882#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20883#[cfg_attr(test, assert_instr(vpsllq))]
20884pub fn _mm256_mask_sll_epi64(src: __m256i, k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
20885 unsafe {
20886 let shf: Simd = _mm256_sll_epi64(a, count).as_i64x4();
20887 transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i64x4()))
20888 }
20889}
20890
20891/// Shift packed 64-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20892///
20893/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sll_epi64&expand=5285)
20894#[inline]
20895#[target_feature(enable = "avx512f,avx512vl")]
20896#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20897#[cfg_attr(test, assert_instr(vpsllq))]
20898pub fn _mm256_maskz_sll_epi64(k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
20899 unsafe {
20900 let shf: Simd = _mm256_sll_epi64(a, count).as_i64x4();
20901 transmute(src:simd_select_bitmask(m:k, yes:shf, no:i64x4::ZERO))
20902 }
20903}
20904
20905/// Shift packed 64-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20906///
20907/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sll_epi64&expand=5281)
20908#[inline]
20909#[target_feature(enable = "avx512f,avx512vl")]
20910#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20911#[cfg_attr(test, assert_instr(vpsllq))]
20912pub fn _mm_mask_sll_epi64(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
20913 unsafe {
20914 let shf: Simd = _mm_sll_epi64(a, count).as_i64x2();
20915 transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i64x2()))
20916 }
20917}
20918
20919/// Shift packed 64-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20920///
20921/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sll_epi64&expand=5282)
20922#[inline]
20923#[target_feature(enable = "avx512f,avx512vl")]
20924#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20925#[cfg_attr(test, assert_instr(vpsllq))]
20926pub fn _mm_maskz_sll_epi64(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
20927 unsafe {
20928 let shf: Simd = _mm_sll_epi64(a, count).as_i64x2();
20929 transmute(src:simd_select_bitmask(m:k, yes:shf, no:i64x2::ZERO))
20930 }
20931}
20932
20933/// Shift packed 64-bit integers in a right by count while shifting in zeros, and store the results in dst.
20934///
20935/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srl_epi64&expand=5501)
20936#[inline]
20937#[target_feature(enable = "avx512f")]
20938#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20939#[cfg_attr(test, assert_instr(vpsrlq))]
20940pub fn _mm512_srl_epi64(a: __m512i, count: __m128i) -> __m512i {
20941 unsafe { transmute(src:vpsrlq(a.as_i64x8(), count.as_i64x2())) }
20942}
20943
20944/// Shift packed 64-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20945///
20946/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srl_epi64&expand=5499)
20947#[inline]
20948#[target_feature(enable = "avx512f")]
20949#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20950#[cfg_attr(test, assert_instr(vpsrlq))]
20951pub fn _mm512_mask_srl_epi64(src: __m512i, k: __mmask8, a: __m512i, count: __m128i) -> __m512i {
20952 unsafe {
20953 let shf: Simd = _mm512_srl_epi64(a, count).as_i64x8();
20954 transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i64x8()))
20955 }
20956}
20957
20958/// Shift packed 64-bit integers in a right by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20959///
20960/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srl_epi64&expand=5500)
20961#[inline]
20962#[target_feature(enable = "avx512f")]
20963#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20964#[cfg_attr(test, assert_instr(vpsrlq))]
20965pub fn _mm512_maskz_srl_epi64(k: __mmask8, a: __m512i, count: __m128i) -> __m512i {
20966 unsafe {
20967 let shf: Simd = _mm512_srl_epi64(a, count).as_i64x8();
20968 transmute(src:simd_select_bitmask(m:k, yes:shf, no:i64x8::ZERO))
20969 }
20970}
20971
20972/// Shift packed 64-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20973///
20974/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srl_epi64&expand=5496)
20975#[inline]
20976#[target_feature(enable = "avx512f,avx512vl")]
20977#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20978#[cfg_attr(test, assert_instr(vpsrlq))]
20979pub fn _mm256_mask_srl_epi64(src: __m256i, k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
20980 unsafe {
20981 let shf: Simd = _mm256_srl_epi64(a, count).as_i64x4();
20982 transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i64x4()))
20983 }
20984}
20985
20986/// Shift packed 64-bit integers in a right by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20987///
20988/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srl_epi64&expand=5497)
20989#[inline]
20990#[target_feature(enable = "avx512f,avx512vl")]
20991#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20992#[cfg_attr(test, assert_instr(vpsrlq))]
20993pub fn _mm256_maskz_srl_epi64(k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
20994 unsafe {
20995 let shf: Simd = _mm256_srl_epi64(a, count).as_i64x4();
20996 transmute(src:simd_select_bitmask(m:k, yes:shf, no:i64x4::ZERO))
20997 }
20998}
20999
21000/// Shift packed 64-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21001///
21002/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srl_epi64&expand=5493)
21003#[inline]
21004#[target_feature(enable = "avx512f,avx512vl")]
21005#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21006#[cfg_attr(test, assert_instr(vpsrlq))]
21007pub fn _mm_mask_srl_epi64(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
21008 unsafe {
21009 let shf: Simd = _mm_srl_epi64(a, count).as_i64x2();
21010 transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i64x2()))
21011 }
21012}
21013
21014/// Shift packed 64-bit integers in a right by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21015///
21016/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srl_epi64&expand=5494)
21017#[inline]
21018#[target_feature(enable = "avx512f,avx512vl")]
21019#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21020#[cfg_attr(test, assert_instr(vpsrlq))]
21021pub fn _mm_maskz_srl_epi64(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
21022 unsafe {
21023 let shf: Simd = _mm_srl_epi64(a, count).as_i64x2();
21024 transmute(src:simd_select_bitmask(m:k, yes:shf, no:i64x2::ZERO))
21025 }
21026}
21027
21028/// Shift packed 32-bit integers in a right by count while shifting in sign bits, and store the results in dst.
21029///
21030/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sra_epi32&expand=5407)
21031#[inline]
21032#[target_feature(enable = "avx512f")]
21033#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21034#[cfg_attr(test, assert_instr(vpsrad))]
21035pub fn _mm512_sra_epi32(a: __m512i, count: __m128i) -> __m512i {
21036 unsafe { transmute(src:vpsrad(a.as_i32x16(), count.as_i32x4())) }
21037}
21038
21039/// Shift packed 32-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21040///
21041/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sra_epi32&expand=5405)
21042#[inline]
21043#[target_feature(enable = "avx512f")]
21044#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21045#[cfg_attr(test, assert_instr(vpsrad))]
21046pub fn _mm512_mask_sra_epi32(src: __m512i, k: __mmask16, a: __m512i, count: __m128i) -> __m512i {
21047 unsafe {
21048 let shf: Simd = _mm512_sra_epi32(a, count).as_i32x16();
21049 transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i32x16()))
21050 }
21051}
21052
21053/// Shift packed 32-bit integers in a right by count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21054///
21055/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sra_epi32&expand=5406)
21056#[inline]
21057#[target_feature(enable = "avx512f")]
21058#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21059#[cfg_attr(test, assert_instr(vpsrad))]
21060pub fn _mm512_maskz_sra_epi32(k: __mmask16, a: __m512i, count: __m128i) -> __m512i {
21061 unsafe {
21062 let shf: Simd = _mm512_sra_epi32(a, count).as_i32x16();
21063 transmute(src:simd_select_bitmask(m:k, yes:shf, no:i32x16::ZERO))
21064 }
21065}
21066
21067/// Shift packed 32-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21068///
21069/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sra_epi32&expand=5402)
21070#[inline]
21071#[target_feature(enable = "avx512f,avx512vl")]
21072#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21073#[cfg_attr(test, assert_instr(vpsrad))]
21074pub fn _mm256_mask_sra_epi32(src: __m256i, k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
21075 unsafe {
21076 let shf: Simd = _mm256_sra_epi32(a, count).as_i32x8();
21077 transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i32x8()))
21078 }
21079}
21080
21081/// Shift packed 32-bit integers in a right by count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21082///
21083/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sra_epi32&expand=5403)
21084#[inline]
21085#[target_feature(enable = "avx512f,avx512vl")]
21086#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21087#[cfg_attr(test, assert_instr(vpsrad))]
21088pub fn _mm256_maskz_sra_epi32(k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
21089 unsafe {
21090 let shf: Simd = _mm256_sra_epi32(a, count).as_i32x8();
21091 transmute(src:simd_select_bitmask(m:k, yes:shf, no:i32x8::ZERO))
21092 }
21093}
21094
21095/// Shift packed 32-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21096///
21097/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sra_epi32&expand=5399)
21098#[inline]
21099#[target_feature(enable = "avx512f,avx512vl")]
21100#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21101#[cfg_attr(test, assert_instr(vpsrad))]
21102pub fn _mm_mask_sra_epi32(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
21103 unsafe {
21104 let shf: Simd = _mm_sra_epi32(a, count).as_i32x4();
21105 transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i32x4()))
21106 }
21107}
21108
21109/// Shift packed 32-bit integers in a right by count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21110///
21111/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sra_epi32&expand=5400)
21112#[inline]
21113#[target_feature(enable = "avx512f,avx512vl")]
21114#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21115#[cfg_attr(test, assert_instr(vpsrad))]
21116pub fn _mm_maskz_sra_epi32(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
21117 unsafe {
21118 let shf: Simd = _mm_sra_epi32(a, count).as_i32x4();
21119 transmute(src:simd_select_bitmask(m:k, yes:shf, no:i32x4::ZERO))
21120 }
21121}
21122
21123/// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst.
21124///
21125/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sra_epi64&expand=5416)
21126#[inline]
21127#[target_feature(enable = "avx512f")]
21128#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21129#[cfg_attr(test, assert_instr(vpsraq))]
21130pub fn _mm512_sra_epi64(a: __m512i, count: __m128i) -> __m512i {
21131 unsafe { transmute(src:vpsraq(a.as_i64x8(), count.as_i64x2())) }
21132}
21133
21134/// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21135///
21136/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sra_epi64&expand=5414)
21137#[inline]
21138#[target_feature(enable = "avx512f")]
21139#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21140#[cfg_attr(test, assert_instr(vpsraq))]
21141pub fn _mm512_mask_sra_epi64(src: __m512i, k: __mmask8, a: __m512i, count: __m128i) -> __m512i {
21142 unsafe {
21143 let shf: Simd = _mm512_sra_epi64(a, count).as_i64x8();
21144 transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i64x8()))
21145 }
21146}
21147
21148/// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21149///
21150/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sra_epi64&expand=5415)
21151#[inline]
21152#[target_feature(enable = "avx512f")]
21153#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21154#[cfg_attr(test, assert_instr(vpsraq))]
21155pub fn _mm512_maskz_sra_epi64(k: __mmask8, a: __m512i, count: __m128i) -> __m512i {
21156 unsafe {
21157 let shf: Simd = _mm512_sra_epi64(a, count).as_i64x8();
21158 transmute(src:simd_select_bitmask(m:k, yes:shf, no:i64x8::ZERO))
21159 }
21160}
21161
21162/// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst.
21163///
21164/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sra_epi64&expand=5413)
21165#[inline]
21166#[target_feature(enable = "avx512f,avx512vl")]
21167#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21168#[cfg_attr(test, assert_instr(vpsraq))]
21169pub fn _mm256_sra_epi64(a: __m256i, count: __m128i) -> __m256i {
21170 unsafe { transmute(src:vpsraq256(a.as_i64x4(), count.as_i64x2())) }
21171}
21172
21173/// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21174///
21175/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sra_epi64&expand=5411)
21176#[inline]
21177#[target_feature(enable = "avx512f,avx512vl")]
21178#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21179#[cfg_attr(test, assert_instr(vpsraq))]
21180pub fn _mm256_mask_sra_epi64(src: __m256i, k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
21181 unsafe {
21182 let shf: Simd = _mm256_sra_epi64(a, count).as_i64x4();
21183 transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i64x4()))
21184 }
21185}
21186
21187/// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21188///
21189/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sra_epi64&expand=5412)
21190#[inline]
21191#[target_feature(enable = "avx512f,avx512vl")]
21192#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21193#[cfg_attr(test, assert_instr(vpsraq))]
21194pub fn _mm256_maskz_sra_epi64(k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
21195 unsafe {
21196 let shf: Simd = _mm256_sra_epi64(a, count).as_i64x4();
21197 transmute(src:simd_select_bitmask(m:k, yes:shf, no:i64x4::ZERO))
21198 }
21199}
21200
21201/// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst.
21202///
21203/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sra_epi64&expand=5410)
21204#[inline]
21205#[target_feature(enable = "avx512f,avx512vl")]
21206#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21207#[cfg_attr(test, assert_instr(vpsraq))]
21208pub fn _mm_sra_epi64(a: __m128i, count: __m128i) -> __m128i {
21209 unsafe { transmute(src:vpsraq128(a.as_i64x2(), count.as_i64x2())) }
21210}
21211
21212/// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21213///
21214/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sra_epi64&expand=5408)
21215#[inline]
21216#[target_feature(enable = "avx512f,avx512vl")]
21217#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21218#[cfg_attr(test, assert_instr(vpsraq))]
21219pub fn _mm_mask_sra_epi64(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
21220 unsafe {
21221 let shf: Simd = _mm_sra_epi64(a, count).as_i64x2();
21222 transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i64x2()))
21223 }
21224}
21225
21226/// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21227///
21228/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sra_epi64&expand=5409)
21229#[inline]
21230#[target_feature(enable = "avx512f,avx512vl")]
21231#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21232#[cfg_attr(test, assert_instr(vpsraq))]
21233pub fn _mm_maskz_sra_epi64(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
21234 unsafe {
21235 let shf: Simd = _mm_sra_epi64(a, count).as_i64x2();
21236 transmute(src:simd_select_bitmask(m:k, yes:shf, no:i64x2::ZERO))
21237 }
21238}
21239
21240/// Shift packed 32-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst.
21241///
21242/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srai_epi32&expand=5436)
21243#[inline]
21244#[target_feature(enable = "avx512f")]
21245#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21246#[cfg_attr(test, assert_instr(vpsrad, IMM8 = 1))]
21247#[rustc_legacy_const_generics(1)]
21248#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21249pub const fn _mm512_srai_epi32<const IMM8: u32>(a: __m512i) -> __m512i {
21250 unsafe {
21251 static_assert_uimm_bits!(IMM8, 8);
21252 transmute(src:simd_shr(lhs:a.as_i32x16(), rhs:i32x16::splat(IMM8.min(31) as i32)))
21253 }
21254}
21255
21256/// Shift packed 32-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21257///
21258/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srai_epi32&expand=5434)
21259#[inline]
21260#[target_feature(enable = "avx512f")]
21261#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21262#[cfg_attr(test, assert_instr(vpsrad, IMM8 = 1))]
21263#[rustc_legacy_const_generics(3)]
21264#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21265pub const fn _mm512_mask_srai_epi32<const IMM8: u32>(
21266 src: __m512i,
21267 k: __mmask16,
21268 a: __m512i,
21269) -> __m512i {
21270 unsafe {
21271 static_assert_uimm_bits!(IMM8, 8);
21272 let r: Simd = simd_shr(lhs:a.as_i32x16(), rhs:i32x16::splat(IMM8.min(31) as i32));
21273 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_i32x16()))
21274 }
21275}
21276
21277/// Shift packed 32-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21278///
21279/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srai_epi32&expand=5435)
21280#[inline]
21281#[target_feature(enable = "avx512f")]
21282#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21283#[cfg_attr(test, assert_instr(vpsrad, IMM8 = 1))]
21284#[rustc_legacy_const_generics(2)]
21285#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21286pub const fn _mm512_maskz_srai_epi32<const IMM8: u32>(k: __mmask16, a: __m512i) -> __m512i {
21287 unsafe {
21288 static_assert_uimm_bits!(IMM8, 8);
21289 let r: Simd = simd_shr(lhs:a.as_i32x16(), rhs:i32x16::splat(IMM8.min(31) as i32));
21290 transmute(src:simd_select_bitmask(m:k, yes:r, no:i32x16::ZERO))
21291 }
21292}
21293
21294/// Shift packed 32-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21295///
21296/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srai_epi32&expand=5431)
21297#[inline]
21298#[target_feature(enable = "avx512f,avx512vl")]
21299#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21300#[cfg_attr(test, assert_instr(vpsrad, IMM8 = 1))]
21301#[rustc_legacy_const_generics(3)]
21302#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21303pub const fn _mm256_mask_srai_epi32<const IMM8: u32>(
21304 src: __m256i,
21305 k: __mmask8,
21306 a: __m256i,
21307) -> __m256i {
21308 unsafe {
21309 let r: Simd = simd_shr(lhs:a.as_i32x8(), rhs:i32x8::splat(IMM8.min(31) as i32));
21310 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_i32x8()))
21311 }
21312}
21313
21314/// Shift packed 32-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21315///
21316/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srai_epi32&expand=5432)
21317#[inline]
21318#[target_feature(enable = "avx512f,avx512vl")]
21319#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21320#[cfg_attr(test, assert_instr(vpsrad, IMM8 = 1))]
21321#[rustc_legacy_const_generics(2)]
21322#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21323pub const fn _mm256_maskz_srai_epi32<const IMM8: u32>(k: __mmask8, a: __m256i) -> __m256i {
21324 unsafe {
21325 let r: Simd = simd_shr(lhs:a.as_i32x8(), rhs:i32x8::splat(IMM8.min(31) as i32));
21326 transmute(src:simd_select_bitmask(m:k, yes:r, no:i32x8::ZERO))
21327 }
21328}
21329
21330/// Shift packed 32-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21331///
21332/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srai_epi32&expand=5428)
21333#[inline]
21334#[target_feature(enable = "avx512f,avx512vl")]
21335#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21336#[cfg_attr(test, assert_instr(vpsrad, IMM8 = 1))]
21337#[rustc_legacy_const_generics(3)]
21338#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21339pub const fn _mm_mask_srai_epi32<const IMM8: u32>(
21340 src: __m128i,
21341 k: __mmask8,
21342 a: __m128i,
21343) -> __m128i {
21344 unsafe {
21345 let r: Simd = simd_shr(lhs:a.as_i32x4(), rhs:i32x4::splat(IMM8.min(31) as i32));
21346 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_i32x4()))
21347 }
21348}
21349
21350/// Shift packed 32-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21351///
21352/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srai_epi32&expand=5429)
21353#[inline]
21354#[target_feature(enable = "avx512f,avx512vl")]
21355#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21356#[cfg_attr(test, assert_instr(vpsrad, IMM8 = 1))]
21357#[rustc_legacy_const_generics(2)]
21358#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21359pub const fn _mm_maskz_srai_epi32<const IMM8: u32>(k: __mmask8, a: __m128i) -> __m128i {
21360 unsafe {
21361 let r: Simd = simd_shr(lhs:a.as_i32x4(), rhs:i32x4::splat(IMM8.min(31) as i32));
21362 transmute(src:simd_select_bitmask(m:k, yes:r, no:i32x4::ZERO))
21363 }
21364}
21365
21366/// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst.
21367///
21368/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srai_epi64&expand=5445)
21369#[inline]
21370#[target_feature(enable = "avx512f")]
21371#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21372#[cfg_attr(test, assert_instr(vpsraq, IMM8 = 1))]
21373#[rustc_legacy_const_generics(1)]
21374#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21375pub const fn _mm512_srai_epi64<const IMM8: u32>(a: __m512i) -> __m512i {
21376 unsafe {
21377 static_assert_uimm_bits!(IMM8, 8);
21378 transmute(src:simd_shr(lhs:a.as_i64x8(), rhs:i64x8::splat(IMM8.min(63) as i64)))
21379 }
21380}
21381
21382/// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21383///
21384/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srai_epi64&expand=5443)
21385#[inline]
21386#[target_feature(enable = "avx512f")]
21387#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21388#[cfg_attr(test, assert_instr(vpsraq, IMM8 = 1))]
21389#[rustc_legacy_const_generics(3)]
21390#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21391pub const fn _mm512_mask_srai_epi64<const IMM8: u32>(
21392 src: __m512i,
21393 k: __mmask8,
21394 a: __m512i,
21395) -> __m512i {
21396 unsafe {
21397 static_assert_uimm_bits!(IMM8, 8);
21398 let shf: Simd = simd_shr(lhs:a.as_i64x8(), rhs:i64x8::splat(IMM8.min(63) as i64));
21399 transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i64x8()))
21400 }
21401}
21402
21403/// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21404///
21405/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srai_epi64&expand=5444)
21406#[inline]
21407#[target_feature(enable = "avx512f")]
21408#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21409#[cfg_attr(test, assert_instr(vpsraq, IMM8 = 1))]
21410#[rustc_legacy_const_generics(2)]
21411#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21412pub const fn _mm512_maskz_srai_epi64<const IMM8: u32>(k: __mmask8, a: __m512i) -> __m512i {
21413 unsafe {
21414 static_assert_uimm_bits!(IMM8, 8);
21415 let shf: Simd = simd_shr(lhs:a.as_i64x8(), rhs:i64x8::splat(IMM8.min(63) as i64));
21416 transmute(src:simd_select_bitmask(m:k, yes:shf, no:i64x8::ZERO))
21417 }
21418}
21419
21420/// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst.
21421///
21422/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_srai_epi64&expand=5442)
21423#[inline]
21424#[target_feature(enable = "avx512f,avx512vl")]
21425#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21426#[cfg_attr(test, assert_instr(vpsraq, IMM8 = 1))]
21427#[rustc_legacy_const_generics(1)]
21428#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21429pub const fn _mm256_srai_epi64<const IMM8: u32>(a: __m256i) -> __m256i {
21430 unsafe {
21431 static_assert_uimm_bits!(IMM8, 8);
21432 transmute(src:simd_shr(lhs:a.as_i64x4(), rhs:i64x4::splat(IMM8.min(63) as i64)))
21433 }
21434}
21435
21436/// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21437///
21438/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srai_epi64&expand=5440)
21439#[inline]
21440#[target_feature(enable = "avx512f,avx512vl")]
21441#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21442#[cfg_attr(test, assert_instr(vpsraq, IMM8 = 1))]
21443#[rustc_legacy_const_generics(3)]
21444#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21445pub const fn _mm256_mask_srai_epi64<const IMM8: u32>(
21446 src: __m256i,
21447 k: __mmask8,
21448 a: __m256i,
21449) -> __m256i {
21450 unsafe {
21451 static_assert_uimm_bits!(IMM8, 8);
21452 let shf: Simd = simd_shr(lhs:a.as_i64x4(), rhs:i64x4::splat(IMM8.min(63) as i64));
21453 transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i64x4()))
21454 }
21455}
21456
21457/// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21458///
21459/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srai_epi64&expand=5441)
21460#[inline]
21461#[target_feature(enable = "avx512f,avx512vl")]
21462#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21463#[cfg_attr(test, assert_instr(vpsraq, IMM8 = 1))]
21464#[rustc_legacy_const_generics(2)]
21465#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21466pub const fn _mm256_maskz_srai_epi64<const IMM8: u32>(k: __mmask8, a: __m256i) -> __m256i {
21467 unsafe {
21468 static_assert_uimm_bits!(IMM8, 8);
21469 let shf: Simd = simd_shr(lhs:a.as_i64x4(), rhs:i64x4::splat(IMM8.min(63) as i64));
21470 transmute(src:simd_select_bitmask(m:k, yes:shf, no:i64x4::ZERO))
21471 }
21472}
21473
21474/// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst.
21475///
21476/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srai_epi64&expand=5439)
21477#[inline]
21478#[target_feature(enable = "avx512f,avx512vl")]
21479#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21480#[cfg_attr(test, assert_instr(vpsraq, IMM8 = 1))]
21481#[rustc_legacy_const_generics(1)]
21482#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21483pub const fn _mm_srai_epi64<const IMM8: u32>(a: __m128i) -> __m128i {
21484 unsafe {
21485 static_assert_uimm_bits!(IMM8, 8);
21486 transmute(src:simd_shr(lhs:a.as_i64x2(), rhs:i64x2::splat(IMM8.min(63) as i64)))
21487 }
21488}
21489
21490/// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21491///
21492/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srai_epi64&expand=5437)
21493#[inline]
21494#[target_feature(enable = "avx512f,avx512vl")]
21495#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21496#[cfg_attr(test, assert_instr(vpsraq, IMM8 = 1))]
21497#[rustc_legacy_const_generics(3)]
21498#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21499pub const fn _mm_mask_srai_epi64<const IMM8: u32>(
21500 src: __m128i,
21501 k: __mmask8,
21502 a: __m128i,
21503) -> __m128i {
21504 unsafe {
21505 static_assert_uimm_bits!(IMM8, 8);
21506 let shf: Simd = simd_shr(lhs:a.as_i64x2(), rhs:i64x2::splat(IMM8.min(63) as i64));
21507 transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i64x2()))
21508 }
21509}
21510
21511/// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21512///
21513/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srai_epi64&expand=5438)
21514#[inline]
21515#[target_feature(enable = "avx512f,avx512vl")]
21516#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21517#[cfg_attr(test, assert_instr(vpsraq, IMM8 = 1))]
21518#[rustc_legacy_const_generics(2)]
21519#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21520pub const fn _mm_maskz_srai_epi64<const IMM8: u32>(k: __mmask8, a: __m128i) -> __m128i {
21521 unsafe {
21522 static_assert_uimm_bits!(IMM8, 8);
21523 let shf: Simd = simd_shr(lhs:a.as_i64x2(), rhs:i64x2::splat(IMM8.min(63) as i64));
21524 transmute(src:simd_select_bitmask(m:k, yes:shf, no:i64x2::ZERO))
21525 }
21526}
21527
21528/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst.
21529///
21530/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srav_epi32&expand=5465)
21531#[inline]
21532#[target_feature(enable = "avx512f")]
21533#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21534#[cfg_attr(test, assert_instr(vpsravd))]
21535#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21536pub const fn _mm512_srav_epi32(a: __m512i, count: __m512i) -> __m512i {
21537 unsafe {
21538 let count: Simd = count.as_u32x16();
21539 let no_overflow: u32x16 = simd_lt(x:count, y:u32x16::splat(u32::BITS));
21540 let count: Simd = simd_select(mask:no_overflow, if_true:transmute(count), if_false:i32x16::splat(31));
21541 simd_shr(lhs:a.as_i32x16(), rhs:count).as_m512i()
21542 }
21543}
21544
21545/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21546///
21547/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srav_epi32&expand=5463)
21548#[inline]
21549#[target_feature(enable = "avx512f")]
21550#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21551#[cfg_attr(test, assert_instr(vpsravd))]
21552#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21553pub const fn _mm512_mask_srav_epi32(
21554 src: __m512i,
21555 k: __mmask16,
21556 a: __m512i,
21557 count: __m512i,
21558) -> __m512i {
21559 unsafe {
21560 let shf: Simd = _mm512_srav_epi32(a, count).as_i32x16();
21561 transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i32x16()))
21562 }
21563}
21564
21565/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21566///
21567/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srav_epi32&expand=5464)
21568#[inline]
21569#[target_feature(enable = "avx512f")]
21570#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21571#[cfg_attr(test, assert_instr(vpsravd))]
21572#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21573pub const fn _mm512_maskz_srav_epi32(k: __mmask16, a: __m512i, count: __m512i) -> __m512i {
21574 unsafe {
21575 let shf: Simd = _mm512_srav_epi32(a, count).as_i32x16();
21576 transmute(src:simd_select_bitmask(m:k, yes:shf, no:i32x16::ZERO))
21577 }
21578}
21579
21580/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21581///
21582/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srav_epi32&expand=5460)
21583#[inline]
21584#[target_feature(enable = "avx512f,avx512vl")]
21585#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21586#[cfg_attr(test, assert_instr(vpsravd))]
21587#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21588pub const fn _mm256_mask_srav_epi32(
21589 src: __m256i,
21590 k: __mmask8,
21591 a: __m256i,
21592 count: __m256i,
21593) -> __m256i {
21594 unsafe {
21595 let shf: Simd = _mm256_srav_epi32(a, count).as_i32x8();
21596 transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i32x8()))
21597 }
21598}
21599
21600/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21601///
21602/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srav_epi32&expand=5461)
21603#[inline]
21604#[target_feature(enable = "avx512f,avx512vl")]
21605#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21606#[cfg_attr(test, assert_instr(vpsravd))]
21607#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21608pub const fn _mm256_maskz_srav_epi32(k: __mmask8, a: __m256i, count: __m256i) -> __m256i {
21609 unsafe {
21610 let shf: Simd = _mm256_srav_epi32(a, count).as_i32x8();
21611 transmute(src:simd_select_bitmask(m:k, yes:shf, no:i32x8::ZERO))
21612 }
21613}
21614
21615/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21616///
21617/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srav_epi32&expand=5457)
21618#[inline]
21619#[target_feature(enable = "avx512f,avx512vl")]
21620#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21621#[cfg_attr(test, assert_instr(vpsravd))]
21622#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21623pub const fn _mm_mask_srav_epi32(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
21624 unsafe {
21625 let shf: Simd = _mm_srav_epi32(a, count).as_i32x4();
21626 transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i32x4()))
21627 }
21628}
21629
21630/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21631///
21632/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srav_epi32&expand=5458)
21633#[inline]
21634#[target_feature(enable = "avx512f,avx512vl")]
21635#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21636#[cfg_attr(test, assert_instr(vpsravd))]
21637#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21638pub const fn _mm_maskz_srav_epi32(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
21639 unsafe {
21640 let shf: Simd = _mm_srav_epi32(a, count).as_i32x4();
21641 transmute(src:simd_select_bitmask(m:k, yes:shf, no:i32x4::ZERO))
21642 }
21643}
21644
21645/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst.
21646///
21647/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srav_epi64&expand=5474)
21648#[inline]
21649#[target_feature(enable = "avx512f")]
21650#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21651#[cfg_attr(test, assert_instr(vpsravq))]
21652#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21653pub const fn _mm512_srav_epi64(a: __m512i, count: __m512i) -> __m512i {
21654 unsafe {
21655 let count: Simd = count.as_u64x8();
21656 let no_overflow: u64x8 = simd_lt(x:count, y:u64x8::splat(u64::BITS as u64));
21657 let count: Simd = simd_select(mask:no_overflow, if_true:transmute(count), if_false:i64x8::splat(63));
21658 simd_shr(lhs:a.as_i64x8(), rhs:count).as_m512i()
21659 }
21660}
21661
21662/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21663///
21664/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srav_epi64&expand=5472)
21665#[inline]
21666#[target_feature(enable = "avx512f")]
21667#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21668#[cfg_attr(test, assert_instr(vpsravq))]
21669#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21670pub const fn _mm512_mask_srav_epi64(
21671 src: __m512i,
21672 k: __mmask8,
21673 a: __m512i,
21674 count: __m512i,
21675) -> __m512i {
21676 unsafe {
21677 let shf: Simd = _mm512_srav_epi64(a, count).as_i64x8();
21678 transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i64x8()))
21679 }
21680}
21681
21682/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21683///
21684/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srav_epi64&expand=5473)
21685#[inline]
21686#[target_feature(enable = "avx512f")]
21687#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21688#[cfg_attr(test, assert_instr(vpsravq))]
21689#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21690pub const fn _mm512_maskz_srav_epi64(k: __mmask8, a: __m512i, count: __m512i) -> __m512i {
21691 unsafe {
21692 let shf: Simd = _mm512_srav_epi64(a, count).as_i64x8();
21693 transmute(src:simd_select_bitmask(m:k, yes:shf, no:i64x8::ZERO))
21694 }
21695}
21696
21697/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst.
21698///
21699/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_srav_epi64&expand=5471)
21700#[inline]
21701#[target_feature(enable = "avx512f,avx512vl")]
21702#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21703#[cfg_attr(test, assert_instr(vpsravq))]
21704#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21705pub const fn _mm256_srav_epi64(a: __m256i, count: __m256i) -> __m256i {
21706 unsafe {
21707 let count: Simd = count.as_u64x4();
21708 let no_overflow: u64x4 = simd_lt(x:count, y:u64x4::splat(u64::BITS as u64));
21709 let count: Simd = simd_select(mask:no_overflow, if_true:transmute(count), if_false:i64x4::splat(63));
21710 simd_shr(lhs:a.as_i64x4(), rhs:count).as_m256i()
21711 }
21712}
21713
21714/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21715///
21716/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srav_epi64&expand=5469)
21717#[inline]
21718#[target_feature(enable = "avx512f,avx512vl")]
21719#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21720#[cfg_attr(test, assert_instr(vpsravq))]
21721#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21722pub const fn _mm256_mask_srav_epi64(
21723 src: __m256i,
21724 k: __mmask8,
21725 a: __m256i,
21726 count: __m256i,
21727) -> __m256i {
21728 unsafe {
21729 let shf: Simd = _mm256_srav_epi64(a, count).as_i64x4();
21730 transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i64x4()))
21731 }
21732}
21733
21734/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21735///
21736/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srav_epi64&expand=5470)
21737#[inline]
21738#[target_feature(enable = "avx512f,avx512vl")]
21739#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21740#[cfg_attr(test, assert_instr(vpsravq))]
21741#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21742pub const fn _mm256_maskz_srav_epi64(k: __mmask8, a: __m256i, count: __m256i) -> __m256i {
21743 unsafe {
21744 let shf: Simd = _mm256_srav_epi64(a, count).as_i64x4();
21745 transmute(src:simd_select_bitmask(m:k, yes:shf, no:i64x4::ZERO))
21746 }
21747}
21748
21749/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst.
21750///
21751/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srav_epi64&expand=5468)
21752#[inline]
21753#[target_feature(enable = "avx512f,avx512vl")]
21754#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21755#[cfg_attr(test, assert_instr(vpsravq))]
21756#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21757pub const fn _mm_srav_epi64(a: __m128i, count: __m128i) -> __m128i {
21758 unsafe {
21759 let count: Simd = count.as_u64x2();
21760 let no_overflow: u64x2 = simd_lt(x:count, y:u64x2::splat(u64::BITS as u64));
21761 let count: Simd = simd_select(mask:no_overflow, if_true:transmute(count), if_false:i64x2::splat(63));
21762 simd_shr(lhs:a.as_i64x2(), rhs:count).as_m128i()
21763 }
21764}
21765
21766/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21767///
21768/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srav_epi64&expand=5466)
21769#[inline]
21770#[target_feature(enable = "avx512f,avx512vl")]
21771#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21772#[cfg_attr(test, assert_instr(vpsravq))]
21773#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21774pub const fn _mm_mask_srav_epi64(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
21775 unsafe {
21776 let shf: Simd = _mm_srav_epi64(a, count).as_i64x2();
21777 transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i64x2()))
21778 }
21779}
21780
21781/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21782///
21783/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srav_epi64&expand=5467)
21784#[inline]
21785#[target_feature(enable = "avx512f,avx512vl")]
21786#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21787#[cfg_attr(test, assert_instr(vpsravq))]
21788#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21789pub const fn _mm_maskz_srav_epi64(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
21790 unsafe {
21791 let shf: Simd = _mm_srav_epi64(a, count).as_i64x2();
21792 transmute(src:simd_select_bitmask(m:k, yes:shf, no:i64x2::ZERO))
21793 }
21794}
21795
21796/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst.
21797///
21798/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_rolv_epi32&expand=4703)
21799#[inline]
21800#[target_feature(enable = "avx512f")]
21801#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21802#[cfg_attr(test, assert_instr(vprolvd))]
21803#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21804pub const fn _mm512_rolv_epi32(a: __m512i, b: __m512i) -> __m512i {
21805 unsafe {
21806 transmute(src:simd_funnel_shl(
21807 a.as_u32x16(),
21808 b:a.as_u32x16(),
21809 shift:simd_and(x:b.as_u32x16(), y:u32x16::splat(31)),
21810 ))
21811 }
21812}
21813
21814/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21815///
21816/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_rolv_epi32&expand=4701)
21817#[inline]
21818#[target_feature(enable = "avx512f")]
21819#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21820#[cfg_attr(test, assert_instr(vprolvd))]
21821#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21822pub const fn _mm512_mask_rolv_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
21823 unsafe {
21824 let rol: Simd = _mm512_rolv_epi32(a, b).as_i32x16();
21825 transmute(src:simd_select_bitmask(m:k, yes:rol, no:src.as_i32x16()))
21826 }
21827}
21828
21829/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21830///
21831/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_rolv_epi32&expand=4702)
21832#[inline]
21833#[target_feature(enable = "avx512f")]
21834#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21835#[cfg_attr(test, assert_instr(vprolvd))]
21836#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21837pub const fn _mm512_maskz_rolv_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
21838 unsafe {
21839 let rol: Simd = _mm512_rolv_epi32(a, b).as_i32x16();
21840 transmute(src:simd_select_bitmask(m:k, yes:rol, no:i32x16::ZERO))
21841 }
21842}
21843
21844/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst.
21845///
21846/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_rolv_epi32&expand=4700)
21847#[inline]
21848#[target_feature(enable = "avx512f,avx512vl")]
21849#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21850#[cfg_attr(test, assert_instr(vprolvd))]
21851#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21852pub const fn _mm256_rolv_epi32(a: __m256i, b: __m256i) -> __m256i {
21853 unsafe {
21854 transmute(src:simd_funnel_shl(
21855 a.as_u32x8(),
21856 b:a.as_u32x8(),
21857 shift:simd_and(x:b.as_u32x8(), y:u32x8::splat(31)),
21858 ))
21859 }
21860}
21861
21862/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21863///
21864/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_rolv_epi32&expand=4698)
21865#[inline]
21866#[target_feature(enable = "avx512f,avx512vl")]
21867#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21868#[cfg_attr(test, assert_instr(vprolvd))]
21869#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21870pub const fn _mm256_mask_rolv_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
21871 unsafe {
21872 let rol: Simd = _mm256_rolv_epi32(a, b).as_i32x8();
21873 transmute(src:simd_select_bitmask(m:k, yes:rol, no:src.as_i32x8()))
21874 }
21875}
21876
21877/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21878///
21879/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_rolv_epi32&expand=4699)
21880#[inline]
21881#[target_feature(enable = "avx512f,avx512vl")]
21882#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21883#[cfg_attr(test, assert_instr(vprolvd))]
21884#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21885pub const fn _mm256_maskz_rolv_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
21886 unsafe {
21887 let rol: Simd = _mm256_rolv_epi32(a, b).as_i32x8();
21888 transmute(src:simd_select_bitmask(m:k, yes:rol, no:i32x8::ZERO))
21889 }
21890}
21891
21892/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst.
21893///
21894/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rolv_epi32&expand=4697)
21895#[inline]
21896#[target_feature(enable = "avx512f,avx512vl")]
21897#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21898#[cfg_attr(test, assert_instr(vprolvd))]
21899#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21900pub const fn _mm_rolv_epi32(a: __m128i, b: __m128i) -> __m128i {
21901 unsafe {
21902 transmute(src:simd_funnel_shl(
21903 a.as_u32x4(),
21904 b:a.as_u32x4(),
21905 shift:simd_and(x:b.as_u32x4(), y:u32x4::splat(31)),
21906 ))
21907 }
21908}
21909
21910/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21911///
21912/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_rolv_epi32&expand=4695)
21913#[inline]
21914#[target_feature(enable = "avx512f,avx512vl")]
21915#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21916#[cfg_attr(test, assert_instr(vprolvd))]
21917#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21918pub const fn _mm_mask_rolv_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
21919 unsafe {
21920 let rol: Simd = _mm_rolv_epi32(a, b).as_i32x4();
21921 transmute(src:simd_select_bitmask(m:k, yes:rol, no:src.as_i32x4()))
21922 }
21923}
21924
21925/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21926///
21927/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_rolv_epi32&expand=4696)
21928#[inline]
21929#[target_feature(enable = "avx512f,avx512vl")]
21930#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21931#[cfg_attr(test, assert_instr(vprolvd))]
21932#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21933pub const fn _mm_maskz_rolv_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
21934 unsafe {
21935 let rol: Simd = _mm_rolv_epi32(a, b).as_i32x4();
21936 transmute(src:simd_select_bitmask(m:k, yes:rol, no:i32x4::ZERO))
21937 }
21938}
21939
21940/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst.
21941///
21942/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_rorv_epi32&expand=4739)
21943#[inline]
21944#[target_feature(enable = "avx512f")]
21945#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21946#[cfg_attr(test, assert_instr(vprorvd))]
21947#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21948pub const fn _mm512_rorv_epi32(a: __m512i, b: __m512i) -> __m512i {
21949 unsafe {
21950 transmute(src:simd_funnel_shr(
21951 a.as_u32x16(),
21952 b:a.as_u32x16(),
21953 shift:simd_and(x:b.as_u32x16(), y:u32x16::splat(31)),
21954 ))
21955 }
21956}
21957
21958/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21959///
21960/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_rorv_epi32&expand=4737)
21961#[inline]
21962#[target_feature(enable = "avx512f")]
21963#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21964#[cfg_attr(test, assert_instr(vprorvd))]
21965#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21966pub const fn _mm512_mask_rorv_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
21967 unsafe {
21968 let ror: Simd = _mm512_rorv_epi32(a, b).as_i32x16();
21969 transmute(src:simd_select_bitmask(m:k, yes:ror, no:src.as_i32x16()))
21970 }
21971}
21972
21973/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21974///
21975/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_rorv_epi32&expand=4738)
21976#[inline]
21977#[target_feature(enable = "avx512f")]
21978#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21979#[cfg_attr(test, assert_instr(vprorvd))]
21980#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21981pub const fn _mm512_maskz_rorv_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
21982 unsafe {
21983 let ror: Simd = _mm512_rorv_epi32(a, b).as_i32x16();
21984 transmute(src:simd_select_bitmask(m:k, yes:ror, no:i32x16::ZERO))
21985 }
21986}
21987
21988/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst.
21989///
21990/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_rorv_epi32&expand=4736)
21991#[inline]
21992#[target_feature(enable = "avx512f,avx512vl")]
21993#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21994#[cfg_attr(test, assert_instr(vprorvd))]
21995#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21996pub const fn _mm256_rorv_epi32(a: __m256i, b: __m256i) -> __m256i {
21997 unsafe {
21998 transmute(src:simd_funnel_shr(
21999 a.as_u32x8(),
22000 b:a.as_u32x8(),
22001 shift:simd_and(x:b.as_u32x8(), y:u32x8::splat(31)),
22002 ))
22003 }
22004}
22005
22006/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22007///
22008/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_rorv_epi32&expand=4734)
22009#[inline]
22010#[target_feature(enable = "avx512f,avx512vl")]
22011#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22012#[cfg_attr(test, assert_instr(vprorvd))]
22013#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22014pub const fn _mm256_mask_rorv_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
22015 unsafe {
22016 let ror: Simd = _mm256_rorv_epi32(a, b).as_i32x8();
22017 transmute(src:simd_select_bitmask(m:k, yes:ror, no:src.as_i32x8()))
22018 }
22019}
22020
22021/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22022///
22023/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_rorv_epi32&expand=4735)
22024#[inline]
22025#[target_feature(enable = "avx512f,avx512vl")]
22026#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22027#[cfg_attr(test, assert_instr(vprorvd))]
22028#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22029pub const fn _mm256_maskz_rorv_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
22030 unsafe {
22031 let ror: Simd = _mm256_rorv_epi32(a, b).as_i32x8();
22032 transmute(src:simd_select_bitmask(m:k, yes:ror, no:i32x8::ZERO))
22033 }
22034}
22035
22036/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst.
22037///
22038/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rorv_epi32&expand=4733)
22039#[inline]
22040#[target_feature(enable = "avx512f,avx512vl")]
22041#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22042#[cfg_attr(test, assert_instr(vprorvd))]
22043#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22044pub const fn _mm_rorv_epi32(a: __m128i, b: __m128i) -> __m128i {
22045 unsafe {
22046 transmute(src:simd_funnel_shr(
22047 a.as_u32x4(),
22048 b:a.as_u32x4(),
22049 shift:simd_and(x:b.as_u32x4(), y:u32x4::splat(31)),
22050 ))
22051 }
22052}
22053
22054/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22055///
22056/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_rorv_epi32&expand=4731)
22057#[inline]
22058#[target_feature(enable = "avx512f,avx512vl")]
22059#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22060#[cfg_attr(test, assert_instr(vprorvd))]
22061#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22062pub const fn _mm_mask_rorv_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
22063 unsafe {
22064 let ror: Simd = _mm_rorv_epi32(a, b).as_i32x4();
22065 transmute(src:simd_select_bitmask(m:k, yes:ror, no:src.as_i32x4()))
22066 }
22067}
22068
22069/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22070///
22071/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_rorv_epi32&expand=4732)
22072#[inline]
22073#[target_feature(enable = "avx512f,avx512vl")]
22074#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22075#[cfg_attr(test, assert_instr(vprorvd))]
22076#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22077pub const fn _mm_maskz_rorv_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
22078 unsafe {
22079 let ror: Simd = _mm_rorv_epi32(a, b).as_i32x4();
22080 transmute(src:simd_select_bitmask(m:k, yes:ror, no:i32x4::ZERO))
22081 }
22082}
22083
22084/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst.
22085///
22086/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_rolv_epi64&expand=4712)
22087#[inline]
22088#[target_feature(enable = "avx512f")]
22089#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22090#[cfg_attr(test, assert_instr(vprolvq))]
22091#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22092pub const fn _mm512_rolv_epi64(a: __m512i, b: __m512i) -> __m512i {
22093 unsafe {
22094 transmute(src:simd_funnel_shl(
22095 a.as_u64x8(),
22096 b:a.as_u64x8(),
22097 shift:simd_and(x:b.as_u64x8(), y:u64x8::splat(63)),
22098 ))
22099 }
22100}
22101
22102/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22103///
22104/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_rolv_epi64&expand=4710)
22105#[inline]
22106#[target_feature(enable = "avx512f")]
22107#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22108#[cfg_attr(test, assert_instr(vprolvq))]
22109#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22110pub const fn _mm512_mask_rolv_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
22111 unsafe {
22112 let rol: Simd = _mm512_rolv_epi64(a, b).as_i64x8();
22113 transmute(src:simd_select_bitmask(m:k, yes:rol, no:src.as_i64x8()))
22114 }
22115}
22116
22117/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22118///
22119/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_rolv_epi64&expand=4711)
22120#[inline]
22121#[target_feature(enable = "avx512f")]
22122#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22123#[cfg_attr(test, assert_instr(vprolvq))]
22124#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22125pub const fn _mm512_maskz_rolv_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
22126 unsafe {
22127 let rol: Simd = _mm512_rolv_epi64(a, b).as_i64x8();
22128 transmute(src:simd_select_bitmask(m:k, yes:rol, no:i64x8::ZERO))
22129 }
22130}
22131
22132/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst.
22133///
22134/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_rolv_epi64&expand=4709)
22135#[inline]
22136#[target_feature(enable = "avx512f,avx512vl")]
22137#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22138#[cfg_attr(test, assert_instr(vprolvq))]
22139#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22140pub const fn _mm256_rolv_epi64(a: __m256i, b: __m256i) -> __m256i {
22141 unsafe {
22142 transmute(src:simd_funnel_shl(
22143 a.as_u64x4(),
22144 b:a.as_u64x4(),
22145 shift:simd_and(x:b.as_u64x4(), y:u64x4::splat(63)),
22146 ))
22147 }
22148}
22149
22150/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22151///
22152/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_rolv_epi64&expand=4707)
22153#[inline]
22154#[target_feature(enable = "avx512f,avx512vl")]
22155#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22156#[cfg_attr(test, assert_instr(vprolvq))]
22157#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22158pub const fn _mm256_mask_rolv_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
22159 unsafe {
22160 let rol: Simd = _mm256_rolv_epi64(a, b).as_i64x4();
22161 transmute(src:simd_select_bitmask(m:k, yes:rol, no:src.as_i64x4()))
22162 }
22163}
22164
22165/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22166///
22167/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_rolv_epi64&expand=4708)
22168#[inline]
22169#[target_feature(enable = "avx512f,avx512vl")]
22170#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22171#[cfg_attr(test, assert_instr(vprolvq))]
22172#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22173pub const fn _mm256_maskz_rolv_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
22174 unsafe {
22175 let rol: Simd = _mm256_rolv_epi64(a, b).as_i64x4();
22176 transmute(src:simd_select_bitmask(m:k, yes:rol, no:i64x4::ZERO))
22177 }
22178}
22179
22180/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst.
22181///
22182/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rolv_epi64&expand=4706)
22183#[inline]
22184#[target_feature(enable = "avx512f,avx512vl")]
22185#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22186#[cfg_attr(test, assert_instr(vprolvq))]
22187#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22188pub const fn _mm_rolv_epi64(a: __m128i, b: __m128i) -> __m128i {
22189 unsafe {
22190 transmute(src:simd_funnel_shl(
22191 a.as_u64x2(),
22192 b:a.as_u64x2(),
22193 shift:simd_and(x:b.as_u64x2(), y:u64x2::splat(63)),
22194 ))
22195 }
22196}
22197
22198/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22199///
22200/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_rolv_epi64&expand=4704)
22201#[inline]
22202#[target_feature(enable = "avx512f,avx512vl")]
22203#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22204#[cfg_attr(test, assert_instr(vprolvq))]
22205#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22206pub const fn _mm_mask_rolv_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
22207 unsafe {
22208 let rol: Simd = _mm_rolv_epi64(a, b).as_i64x2();
22209 transmute(src:simd_select_bitmask(m:k, yes:rol, no:src.as_i64x2()))
22210 }
22211}
22212
22213/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22214///
22215/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_rolv_epi64&expand=4705)
22216#[inline]
22217#[target_feature(enable = "avx512f,avx512vl")]
22218#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22219#[cfg_attr(test, assert_instr(vprolvq))]
22220#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22221pub const fn _mm_maskz_rolv_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
22222 unsafe {
22223 let rol: Simd = _mm_rolv_epi64(a, b).as_i64x2();
22224 transmute(src:simd_select_bitmask(m:k, yes:rol, no:i64x2::ZERO))
22225 }
22226}
22227
22228/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst.
22229///
22230/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_rorv_epi64&expand=4748)
22231#[inline]
22232#[target_feature(enable = "avx512f")]
22233#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22234#[cfg_attr(test, assert_instr(vprorvq))]
22235#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22236pub const fn _mm512_rorv_epi64(a: __m512i, b: __m512i) -> __m512i {
22237 unsafe {
22238 transmute(src:simd_funnel_shr(
22239 a.as_u64x8(),
22240 b:a.as_u64x8(),
22241 shift:simd_and(x:b.as_u64x8(), y:u64x8::splat(63)),
22242 ))
22243 }
22244}
22245
22246/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22247///
22248/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_rorv_epi64&expand=4746)
22249#[inline]
22250#[target_feature(enable = "avx512f")]
22251#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22252#[cfg_attr(test, assert_instr(vprorvq))]
22253#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22254pub const fn _mm512_mask_rorv_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
22255 unsafe {
22256 let ror: Simd = _mm512_rorv_epi64(a, b).as_i64x8();
22257 transmute(src:simd_select_bitmask(m:k, yes:ror, no:src.as_i64x8()))
22258 }
22259}
22260
22261/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22262///
22263/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_rorv_epi64&expand=4747)
22264#[inline]
22265#[target_feature(enable = "avx512f")]
22266#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22267#[cfg_attr(test, assert_instr(vprorvq))]
22268#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22269pub const fn _mm512_maskz_rorv_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
22270 unsafe {
22271 let ror: Simd = _mm512_rorv_epi64(a, b).as_i64x8();
22272 transmute(src:simd_select_bitmask(m:k, yes:ror, no:i64x8::ZERO))
22273 }
22274}
22275
22276/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst.
22277///
22278/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_rorv_epi64&expand=4745)
22279#[inline]
22280#[target_feature(enable = "avx512f,avx512vl")]
22281#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22282#[cfg_attr(test, assert_instr(vprorvq))]
22283#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22284pub const fn _mm256_rorv_epi64(a: __m256i, b: __m256i) -> __m256i {
22285 unsafe {
22286 transmute(src:simd_funnel_shr(
22287 a.as_u64x4(),
22288 b:a.as_u64x4(),
22289 shift:simd_and(x:b.as_u64x4(), y:u64x4::splat(63)),
22290 ))
22291 }
22292}
22293
22294/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22295///
22296/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_rorv_epi64&expand=4743)
22297#[inline]
22298#[target_feature(enable = "avx512f,avx512vl")]
22299#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22300#[cfg_attr(test, assert_instr(vprorvq))]
22301#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22302pub const fn _mm256_mask_rorv_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
22303 unsafe {
22304 let ror: Simd = _mm256_rorv_epi64(a, b).as_i64x4();
22305 transmute(src:simd_select_bitmask(m:k, yes:ror, no:src.as_i64x4()))
22306 }
22307}
22308
22309/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22310///
22311/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_rorv_epi64&expand=4744)
22312#[inline]
22313#[target_feature(enable = "avx512f,avx512vl")]
22314#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22315#[cfg_attr(test, assert_instr(vprorvq))]
22316#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22317pub const fn _mm256_maskz_rorv_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
22318 unsafe {
22319 let ror: Simd = _mm256_rorv_epi64(a, b).as_i64x4();
22320 transmute(src:simd_select_bitmask(m:k, yes:ror, no:i64x4::ZERO))
22321 }
22322}
22323
22324/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst.
22325///
22326/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rorv_epi64&expand=4742)
22327#[inline]
22328#[target_feature(enable = "avx512f,avx512vl")]
22329#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22330#[cfg_attr(test, assert_instr(vprorvq))]
22331#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22332pub const fn _mm_rorv_epi64(a: __m128i, b: __m128i) -> __m128i {
22333 unsafe {
22334 transmute(src:simd_funnel_shr(
22335 a.as_u64x2(),
22336 b:a.as_u64x2(),
22337 shift:simd_and(x:b.as_u64x2(), y:u64x2::splat(63)),
22338 ))
22339 }
22340}
22341
22342/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22343///
22344/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_rorv_epi64&expand=4740)
22345#[inline]
22346#[target_feature(enable = "avx512f,avx512vl")]
22347#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22348#[cfg_attr(test, assert_instr(vprorvq))]
22349#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22350pub const fn _mm_mask_rorv_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
22351 unsafe {
22352 let ror: Simd = _mm_rorv_epi64(a, b).as_i64x2();
22353 transmute(src:simd_select_bitmask(m:k, yes:ror, no:src.as_i64x2()))
22354 }
22355}
22356
22357/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22358///
22359/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_rorv_epi64&expand=4741)
22360#[inline]
22361#[target_feature(enable = "avx512f,avx512vl")]
22362#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22363#[cfg_attr(test, assert_instr(vprorvq))]
22364#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22365pub const fn _mm_maskz_rorv_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
22366 unsafe {
22367 let ror: Simd = _mm_rorv_epi64(a, b).as_i64x2();
22368 transmute(src:simd_select_bitmask(m:k, yes:ror, no:i64x2::ZERO))
22369 }
22370}
22371
22372/// Shift packed 32-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst.
22373///
22374/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sllv_epi32&expand=5342)
22375#[inline]
22376#[target_feature(enable = "avx512f")]
22377#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22378#[cfg_attr(test, assert_instr(vpsllvd))]
22379#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22380pub const fn _mm512_sllv_epi32(a: __m512i, count: __m512i) -> __m512i {
22381 unsafe {
22382 let count: Simd = count.as_u32x16();
22383 let no_overflow: u32x16 = simd_lt(x:count, y:u32x16::splat(u32::BITS));
22384 let count: Simd = simd_select(mask:no_overflow, if_true:count, if_false:u32x16::ZERO);
22385 simd_select(mask:no_overflow, if_true:simd_shl(a.as_u32x16(), count), if_false:u32x16::ZERO).as_m512i()
22386 }
22387}
22388
22389/// Shift packed 32-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22390///
22391/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sllv_epi32&expand=5340)
22392#[inline]
22393#[target_feature(enable = "avx512f")]
22394#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22395#[cfg_attr(test, assert_instr(vpsllvd))]
22396#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22397pub const fn _mm512_mask_sllv_epi32(
22398 src: __m512i,
22399 k: __mmask16,
22400 a: __m512i,
22401 count: __m512i,
22402) -> __m512i {
22403 unsafe {
22404 let shf: Simd = _mm512_sllv_epi32(a, count).as_i32x16();
22405 transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i32x16()))
22406 }
22407}
22408
22409/// Shift packed 32-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22410///
22411/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sllv_epi32&expand=5341)
22412#[inline]
22413#[target_feature(enable = "avx512f")]
22414#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22415#[cfg_attr(test, assert_instr(vpsllvd))]
22416#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22417pub const fn _mm512_maskz_sllv_epi32(k: __mmask16, a: __m512i, count: __m512i) -> __m512i {
22418 unsafe {
22419 let shf: Simd = _mm512_sllv_epi32(a, count).as_i32x16();
22420 transmute(src:simd_select_bitmask(m:k, yes:shf, no:i32x16::ZERO))
22421 }
22422}
22423
22424/// Shift packed 32-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22425///
22426/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sllv_epi32&expand=5337)
22427#[inline]
22428#[target_feature(enable = "avx512f,avx512vl")]
22429#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22430#[cfg_attr(test, assert_instr(vpsllvd))]
22431#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22432pub const fn _mm256_mask_sllv_epi32(
22433 src: __m256i,
22434 k: __mmask8,
22435 a: __m256i,
22436 count: __m256i,
22437) -> __m256i {
22438 unsafe {
22439 let shf: Simd = _mm256_sllv_epi32(a, count).as_i32x8();
22440 transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i32x8()))
22441 }
22442}
22443
22444/// Shift packed 32-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22445///
22446/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sllv_epi32&expand=5338)
22447#[inline]
22448#[target_feature(enable = "avx512f,avx512vl")]
22449#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22450#[cfg_attr(test, assert_instr(vpsllvd))]
22451#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22452pub const fn _mm256_maskz_sllv_epi32(k: __mmask8, a: __m256i, count: __m256i) -> __m256i {
22453 unsafe {
22454 let shf: Simd = _mm256_sllv_epi32(a, count).as_i32x8();
22455 transmute(src:simd_select_bitmask(m:k, yes:shf, no:i32x8::ZERO))
22456 }
22457}
22458
22459/// Shift packed 32-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22460///
22461/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sllv_epi32&expand=5334)
22462#[inline]
22463#[target_feature(enable = "avx512f,avx512vl")]
22464#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22465#[cfg_attr(test, assert_instr(vpsllvd))]
22466#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22467pub const fn _mm_mask_sllv_epi32(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
22468 unsafe {
22469 let shf: Simd = _mm_sllv_epi32(a, count).as_i32x4();
22470 transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i32x4()))
22471 }
22472}
22473
22474/// Shift packed 32-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22475///
22476/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sllv_epi32&expand=5335)
22477#[inline]
22478#[target_feature(enable = "avx512f,avx512vl")]
22479#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22480#[cfg_attr(test, assert_instr(vpsllvd))]
22481#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22482pub const fn _mm_maskz_sllv_epi32(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
22483 unsafe {
22484 let shf: Simd = _mm_sllv_epi32(a, count).as_i32x4();
22485 transmute(src:simd_select_bitmask(m:k, yes:shf, no:i32x4::ZERO))
22486 }
22487}
22488
22489/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst.
22490///
22491/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srlv_epi32&expand=5554)
22492#[inline]
22493#[target_feature(enable = "avx512f")]
22494#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22495#[cfg_attr(test, assert_instr(vpsrlvd))]
22496#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22497pub const fn _mm512_srlv_epi32(a: __m512i, count: __m512i) -> __m512i {
22498 unsafe {
22499 let count: Simd = count.as_u32x16();
22500 let no_overflow: u32x16 = simd_lt(x:count, y:u32x16::splat(u32::BITS));
22501 let count: Simd = simd_select(mask:no_overflow, if_true:count, if_false:u32x16::ZERO);
22502 simd_select(mask:no_overflow, if_true:simd_shr(a.as_u32x16(), count), if_false:u32x16::ZERO).as_m512i()
22503 }
22504}
22505
22506/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22507///
22508/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srlv_epi32&expand=5552)
22509#[inline]
22510#[target_feature(enable = "avx512f")]
22511#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22512#[cfg_attr(test, assert_instr(vpsrlvd))]
22513#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22514pub const fn _mm512_mask_srlv_epi32(
22515 src: __m512i,
22516 k: __mmask16,
22517 a: __m512i,
22518 count: __m512i,
22519) -> __m512i {
22520 unsafe {
22521 let shf: Simd = _mm512_srlv_epi32(a, count).as_i32x16();
22522 transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i32x16()))
22523 }
22524}
22525
22526/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22527///
22528/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srlv_epi32&expand=5553)
22529#[inline]
22530#[target_feature(enable = "avx512f")]
22531#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22532#[cfg_attr(test, assert_instr(vpsrlvd))]
22533#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22534pub const fn _mm512_maskz_srlv_epi32(k: __mmask16, a: __m512i, count: __m512i) -> __m512i {
22535 unsafe {
22536 let shf: Simd = _mm512_srlv_epi32(a, count).as_i32x16();
22537 transmute(src:simd_select_bitmask(m:k, yes:shf, no:i32x16::ZERO))
22538 }
22539}
22540
22541/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22542///
22543/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srlv_epi32&expand=5549)
22544#[inline]
22545#[target_feature(enable = "avx512f,avx512vl")]
22546#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22547#[cfg_attr(test, assert_instr(vpsrlvd))]
22548#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22549pub const fn _mm256_mask_srlv_epi32(
22550 src: __m256i,
22551 k: __mmask8,
22552 a: __m256i,
22553 count: __m256i,
22554) -> __m256i {
22555 unsafe {
22556 let shf: Simd = _mm256_srlv_epi32(a, count).as_i32x8();
22557 transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i32x8()))
22558 }
22559}
22560
22561/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22562///
22563/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srlv_epi32&expand=5550)
22564#[inline]
22565#[target_feature(enable = "avx512f,avx512vl")]
22566#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22567#[cfg_attr(test, assert_instr(vpsrlvd))]
22568#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22569pub const fn _mm256_maskz_srlv_epi32(k: __mmask8, a: __m256i, count: __m256i) -> __m256i {
22570 unsafe {
22571 let shf: Simd = _mm256_srlv_epi32(a, count).as_i32x8();
22572 transmute(src:simd_select_bitmask(m:k, yes:shf, no:i32x8::ZERO))
22573 }
22574}
22575
22576/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22577///
22578/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srlv_epi32&expand=5546)
22579#[inline]
22580#[target_feature(enable = "avx512f,avx512vl")]
22581#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22582#[cfg_attr(test, assert_instr(vpsrlvd))]
22583#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22584pub const fn _mm_mask_srlv_epi32(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
22585 unsafe {
22586 let shf: Simd = _mm_srlv_epi32(a, count).as_i32x4();
22587 transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i32x4()))
22588 }
22589}
22590
22591/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22592///
22593/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srlv_epi32&expand=5547)
22594#[inline]
22595#[target_feature(enable = "avx512f,avx512vl")]
22596#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22597#[cfg_attr(test, assert_instr(vpsrlvd))]
22598#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22599pub const fn _mm_maskz_srlv_epi32(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
22600 unsafe {
22601 let shf: Simd = _mm_srlv_epi32(a, count).as_i32x4();
22602 transmute(src:simd_select_bitmask(m:k, yes:shf, no:i32x4::ZERO))
22603 }
22604}
22605
22606/// Shift packed 64-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst.
22607///
22608/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sllv_epi64&expand=5351)
22609#[inline]
22610#[target_feature(enable = "avx512f")]
22611#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22612#[cfg_attr(test, assert_instr(vpsllvq))]
22613#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22614pub const fn _mm512_sllv_epi64(a: __m512i, count: __m512i) -> __m512i {
22615 unsafe {
22616 let count: Simd = count.as_u64x8();
22617 let no_overflow: u64x8 = simd_lt(x:count, y:u64x8::splat(u64::BITS as u64));
22618 let count: Simd = simd_select(mask:no_overflow, if_true:count, if_false:u64x8::ZERO);
22619 simd_select(mask:no_overflow, if_true:simd_shl(a.as_u64x8(), count), if_false:u64x8::ZERO).as_m512i()
22620 }
22621}
22622
22623/// Shift packed 64-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22624///
22625/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sllv_epi64&expand=5349)
22626#[inline]
22627#[target_feature(enable = "avx512f")]
22628#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22629#[cfg_attr(test, assert_instr(vpsllvq))]
22630#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22631pub const fn _mm512_mask_sllv_epi64(
22632 src: __m512i,
22633 k: __mmask8,
22634 a: __m512i,
22635 count: __m512i,
22636) -> __m512i {
22637 unsafe {
22638 let shf: Simd = _mm512_sllv_epi64(a, count).as_i64x8();
22639 transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i64x8()))
22640 }
22641}
22642
22643/// Shift packed 64-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22644///
22645/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sllv_epi64&expand=5350)
22646#[inline]
22647#[target_feature(enable = "avx512f")]
22648#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22649#[cfg_attr(test, assert_instr(vpsllvq))]
22650#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22651pub const fn _mm512_maskz_sllv_epi64(k: __mmask8, a: __m512i, count: __m512i) -> __m512i {
22652 unsafe {
22653 let shf: Simd = _mm512_sllv_epi64(a, count).as_i64x8();
22654 transmute(src:simd_select_bitmask(m:k, yes:shf, no:i64x8::ZERO))
22655 }
22656}
22657
22658/// Shift packed 64-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22659///
22660/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sllv_epi64&expand=5346)
22661#[inline]
22662#[target_feature(enable = "avx512f,avx512vl")]
22663#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22664#[cfg_attr(test, assert_instr(vpsllvq))]
22665#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22666pub const fn _mm256_mask_sllv_epi64(
22667 src: __m256i,
22668 k: __mmask8,
22669 a: __m256i,
22670 count: __m256i,
22671) -> __m256i {
22672 unsafe {
22673 let shf: Simd = _mm256_sllv_epi64(a, count).as_i64x4();
22674 transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i64x4()))
22675 }
22676}
22677
22678/// Shift packed 64-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22679///
22680/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sllv_epi64&expand=5347)
22681#[inline]
22682#[target_feature(enable = "avx512f,avx512vl")]
22683#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22684#[cfg_attr(test, assert_instr(vpsllvq))]
22685#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22686pub const fn _mm256_maskz_sllv_epi64(k: __mmask8, a: __m256i, count: __m256i) -> __m256i {
22687 unsafe {
22688 let shf: Simd = _mm256_sllv_epi64(a, count).as_i64x4();
22689 transmute(src:simd_select_bitmask(m:k, yes:shf, no:i64x4::ZERO))
22690 }
22691}
22692
22693/// Shift packed 64-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22694///
22695/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sllv_epi64&expand=5343)
22696#[inline]
22697#[target_feature(enable = "avx512f,avx512vl")]
22698#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22699#[cfg_attr(test, assert_instr(vpsllvq))]
22700#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22701pub const fn _mm_mask_sllv_epi64(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
22702 unsafe {
22703 let shf: Simd = _mm_sllv_epi64(a, count).as_i64x2();
22704 transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i64x2()))
22705 }
22706}
22707
22708/// Shift packed 64-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22709///
22710/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sllv_epi64&expand=5344)
22711#[inline]
22712#[target_feature(enable = "avx512f,avx512vl")]
22713#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22714#[cfg_attr(test, assert_instr(vpsllvq))]
22715#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22716pub const fn _mm_maskz_sllv_epi64(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
22717 unsafe {
22718 let shf: Simd = _mm_sllv_epi64(a, count).as_i64x2();
22719 transmute(src:simd_select_bitmask(m:k, yes:shf, no:i64x2::ZERO))
22720 }
22721}
22722
22723/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst.
22724///
22725/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srlv_epi64&expand=5563)
22726#[inline]
22727#[target_feature(enable = "avx512f")]
22728#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22729#[cfg_attr(test, assert_instr(vpsrlvq))]
22730#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22731pub const fn _mm512_srlv_epi64(a: __m512i, count: __m512i) -> __m512i {
22732 unsafe {
22733 let count: Simd = count.as_u64x8();
22734 let no_overflow: u64x8 = simd_lt(x:count, y:u64x8::splat(u64::BITS as u64));
22735 let count: Simd = simd_select(mask:no_overflow, if_true:count, if_false:u64x8::ZERO);
22736 simd_select(mask:no_overflow, if_true:simd_shr(a.as_u64x8(), count), if_false:u64x8::ZERO).as_m512i()
22737 }
22738}
22739
22740/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22741///
22742/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srlv_epi64&expand=5561)
22743#[inline]
22744#[target_feature(enable = "avx512f")]
22745#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22746#[cfg_attr(test, assert_instr(vpsrlvq))]
22747#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22748pub const fn _mm512_mask_srlv_epi64(
22749 src: __m512i,
22750 k: __mmask8,
22751 a: __m512i,
22752 count: __m512i,
22753) -> __m512i {
22754 unsafe {
22755 let shf: Simd = _mm512_srlv_epi64(a, count).as_i64x8();
22756 transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i64x8()))
22757 }
22758}
22759
22760/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22761///
22762/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srlv_epi64&expand=5562)
22763#[inline]
22764#[target_feature(enable = "avx512f")]
22765#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22766#[cfg_attr(test, assert_instr(vpsrlvq))]
22767#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22768pub const fn _mm512_maskz_srlv_epi64(k: __mmask8, a: __m512i, count: __m512i) -> __m512i {
22769 unsafe {
22770 let shf: Simd = _mm512_srlv_epi64(a, count).as_i64x8();
22771 transmute(src:simd_select_bitmask(m:k, yes:shf, no:i64x8::ZERO))
22772 }
22773}
22774
22775/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22776///
22777/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srlv_epi64&expand=5558)
22778#[inline]
22779#[target_feature(enable = "avx512f,avx512vl")]
22780#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22781#[cfg_attr(test, assert_instr(vpsrlvq))]
22782#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22783pub const fn _mm256_mask_srlv_epi64(
22784 src: __m256i,
22785 k: __mmask8,
22786 a: __m256i,
22787 count: __m256i,
22788) -> __m256i {
22789 unsafe {
22790 let shf: Simd = _mm256_srlv_epi64(a, count).as_i64x4();
22791 transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i64x4()))
22792 }
22793}
22794
22795/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22796///
22797/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srlv_epi64&expand=5559)
22798#[inline]
22799#[target_feature(enable = "avx512f,avx512vl")]
22800#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22801#[cfg_attr(test, assert_instr(vpsrlvq))]
22802#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22803pub const fn _mm256_maskz_srlv_epi64(k: __mmask8, a: __m256i, count: __m256i) -> __m256i {
22804 unsafe {
22805 let shf: Simd = _mm256_srlv_epi64(a, count).as_i64x4();
22806 transmute(src:simd_select_bitmask(m:k, yes:shf, no:i64x4::ZERO))
22807 }
22808}
22809
22810/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22811///
22812/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srlv_epi64&expand=5555)
22813#[inline]
22814#[target_feature(enable = "avx512f,avx512vl")]
22815#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22816#[cfg_attr(test, assert_instr(vpsrlvq))]
22817#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22818pub const fn _mm_mask_srlv_epi64(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
22819 unsafe {
22820 let shf: Simd = _mm_srlv_epi64(a, count).as_i64x2();
22821 transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i64x2()))
22822 }
22823}
22824
22825/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22826///
22827/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srlv_epi64&expand=5556)
22828#[inline]
22829#[target_feature(enable = "avx512f,avx512vl")]
22830#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22831#[cfg_attr(test, assert_instr(vpsrlvq))]
22832#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22833pub const fn _mm_maskz_srlv_epi64(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
22834 unsafe {
22835 let shf: Simd = _mm_srlv_epi64(a, count).as_i64x2();
22836 transmute(src:simd_select_bitmask(m:k, yes:shf, no:i64x2::ZERO))
22837 }
22838}
22839
22840/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst.
22841///
22842/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permute_ps&expand=4170)
22843#[inline]
22844#[target_feature(enable = "avx512f")]
22845#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22846#[cfg_attr(test, assert_instr(vshufps, MASK = 0b11_00_01_11))]
22847#[rustc_legacy_const_generics(1)]
22848#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22849pub const fn _mm512_permute_ps<const MASK: i32>(a: __m512) -> __m512 {
22850 unsafe {
22851 static_assert_uimm_bits!(MASK, 8);
22852 simd_shuffle!(
22853 a,
22854 a,
22855 [
22856 MASK as u32 & 0b11,
22857 (MASK as u32 >> 2) & 0b11,
22858 ((MASK as u32 >> 4) & 0b11),
22859 ((MASK as u32 >> 6) & 0b11),
22860 (MASK as u32 & 0b11) + 4,
22861 ((MASK as u32 >> 2) & 0b11) + 4,
22862 ((MASK as u32 >> 4) & 0b11) + 4,
22863 ((MASK as u32 >> 6) & 0b11) + 4,
22864 (MASK as u32 & 0b11) + 8,
22865 ((MASK as u32 >> 2) & 0b11) + 8,
22866 ((MASK as u32 >> 4) & 0b11) + 8,
22867 ((MASK as u32 >> 6) & 0b11) + 8,
22868 (MASK as u32 & 0b11) + 12,
22869 ((MASK as u32 >> 2) & 0b11) + 12,
22870 ((MASK as u32 >> 4) & 0b11) + 12,
22871 ((MASK as u32 >> 6) & 0b11) + 12,
22872 ],
22873 )
22874 }
22875}
22876
22877/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22878///
22879/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permute_ps&expand=4168)
22880#[inline]
22881#[target_feature(enable = "avx512f")]
22882#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22883#[cfg_attr(test, assert_instr(vshufps, MASK = 0b11_00_01_11))]
22884#[rustc_legacy_const_generics(3)]
22885#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22886pub const fn _mm512_mask_permute_ps<const MASK: i32>(
22887 src: __m512,
22888 k: __mmask16,
22889 a: __m512,
22890) -> __m512 {
22891 unsafe {
22892 static_assert_uimm_bits!(MASK, 8);
22893 let r: __m512 = _mm512_permute_ps::<MASK>(a);
22894 transmute(src:simd_select_bitmask(m:k, yes:r.as_f32x16(), no:src.as_f32x16()))
22895 }
22896}
22897
22898/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22899///
22900/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permute_ps&expand=4169)
22901#[inline]
22902#[target_feature(enable = "avx512f")]
22903#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22904#[cfg_attr(test, assert_instr(vshufps, MASK = 0b11_00_01_11))]
22905#[rustc_legacy_const_generics(2)]
22906#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22907pub const fn _mm512_maskz_permute_ps<const MASK: i32>(k: __mmask16, a: __m512) -> __m512 {
22908 unsafe {
22909 static_assert_uimm_bits!(MASK, 8);
22910 let r: __m512 = _mm512_permute_ps::<MASK>(a);
22911 transmute(src:simd_select_bitmask(m:k, yes:r.as_f32x16(), no:f32x16::ZERO))
22912 }
22913}
22914
22915/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22916///
22917/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permute_ps&expand=4165)
22918#[inline]
22919#[target_feature(enable = "avx512f,avx512vl")]
22920#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22921#[cfg_attr(test, assert_instr(vshufps, MASK = 0b11_00_01_11))]
22922#[rustc_legacy_const_generics(3)]
22923#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22924pub const fn _mm256_mask_permute_ps<const MASK: i32>(
22925 src: __m256,
22926 k: __mmask8,
22927 a: __m256,
22928) -> __m256 {
22929 unsafe {
22930 let r: __m256 = _mm256_permute_ps::<MASK>(a);
22931 transmute(src:simd_select_bitmask(m:k, yes:r.as_f32x8(), no:src.as_f32x8()))
22932 }
22933}
22934
22935/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22936///
22937/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permute_ps&expand=4166)
22938#[inline]
22939#[target_feature(enable = "avx512f,avx512vl")]
22940#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22941#[cfg_attr(test, assert_instr(vshufps, MASK = 0b11_00_01_11))]
22942#[rustc_legacy_const_generics(2)]
22943#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22944pub const fn _mm256_maskz_permute_ps<const MASK: i32>(k: __mmask8, a: __m256) -> __m256 {
22945 unsafe {
22946 let r: __m256 = _mm256_permute_ps::<MASK>(a);
22947 transmute(src:simd_select_bitmask(m:k, yes:r.as_f32x8(), no:f32x8::ZERO))
22948 }
22949}
22950
22951/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22952///
22953/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_permute_ps&expand=4162)
22954#[inline]
22955#[target_feature(enable = "avx512f,avx512vl")]
22956#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22957#[cfg_attr(test, assert_instr(vshufps, MASK = 0b11_00_01_11))]
22958#[rustc_legacy_const_generics(3)]
22959#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22960pub const fn _mm_mask_permute_ps<const MASK: i32>(src: __m128, k: __mmask8, a: __m128) -> __m128 {
22961 unsafe {
22962 let r: __m128 = _mm_permute_ps::<MASK>(a);
22963 transmute(src:simd_select_bitmask(m:k, yes:r.as_f32x4(), no:src.as_f32x4()))
22964 }
22965}
22966
22967/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22968///
22969/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_permute_ps&expand=4163)
22970#[inline]
22971#[target_feature(enable = "avx512f,avx512vl")]
22972#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22973#[cfg_attr(test, assert_instr(vshufps, MASK = 0b11_00_01_11))]
22974#[rustc_legacy_const_generics(2)]
22975#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22976pub const fn _mm_maskz_permute_ps<const MASK: i32>(k: __mmask8, a: __m128) -> __m128 {
22977 unsafe {
22978 let r: __m128 = _mm_permute_ps::<MASK>(a);
22979 transmute(src:simd_select_bitmask(m:k, yes:r.as_f32x4(), no:f32x4::ZERO))
22980 }
22981}
22982
22983/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst.
22984///
22985/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permute_pd&expand=4161)
22986#[inline]
22987#[target_feature(enable = "avx512f")]
22988#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22989#[cfg_attr(test, assert_instr(vshufpd, MASK = 0b11_01_10_01))]
22990#[rustc_legacy_const_generics(1)]
22991#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22992pub const fn _mm512_permute_pd<const MASK: i32>(a: __m512d) -> __m512d {
22993 unsafe {
22994 static_assert_uimm_bits!(MASK, 8);
22995 simd_shuffle!(
22996 a,
22997 a,
22998 [
22999 MASK as u32 & 0b1,
23000 ((MASK as u32 >> 1) & 0b1),
23001 ((MASK as u32 >> 2) & 0b1) + 2,
23002 ((MASK as u32 >> 3) & 0b1) + 2,
23003 ((MASK as u32 >> 4) & 0b1) + 4,
23004 ((MASK as u32 >> 5) & 0b1) + 4,
23005 ((MASK as u32 >> 6) & 0b1) + 6,
23006 ((MASK as u32 >> 7) & 0b1) + 6,
23007 ],
23008 )
23009 }
23010}
23011
23012/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23013///
23014/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permute_pd&expand=4159)
23015#[inline]
23016#[target_feature(enable = "avx512f")]
23017#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23018#[cfg_attr(test, assert_instr(vshufpd, MASK = 0b11_01_10_01))]
23019#[rustc_legacy_const_generics(3)]
23020#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
23021pub const fn _mm512_mask_permute_pd<const MASK: i32>(
23022 src: __m512d,
23023 k: __mmask8,
23024 a: __m512d,
23025) -> __m512d {
23026 unsafe {
23027 static_assert_uimm_bits!(MASK, 8);
23028 let r: __m512d = _mm512_permute_pd::<MASK>(a);
23029 transmute(src:simd_select_bitmask(m:k, yes:r.as_f64x8(), no:src.as_f64x8()))
23030 }
23031}
23032
23033/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23034///
23035/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permute_pd&expand=4160)
23036#[inline]
23037#[target_feature(enable = "avx512f")]
23038#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23039#[cfg_attr(test, assert_instr(vshufpd, MASK = 0b11_01_10_01))]
23040#[rustc_legacy_const_generics(2)]
23041#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
23042pub const fn _mm512_maskz_permute_pd<const MASK: i32>(k: __mmask8, a: __m512d) -> __m512d {
23043 unsafe {
23044 static_assert_uimm_bits!(MASK, 8);
23045 let r: __m512d = _mm512_permute_pd::<MASK>(a);
23046 transmute(src:simd_select_bitmask(m:k, yes:r.as_f64x8(), no:f64x8::ZERO))
23047 }
23048}
23049
23050/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23051///
23052/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permute_pd&expand=4156)
23053#[inline]
23054#[target_feature(enable = "avx512f,avx512vl")]
23055#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23056#[cfg_attr(test, assert_instr(vshufpd, MASK = 0b11_01))]
23057#[rustc_legacy_const_generics(3)]
23058#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
23059pub const fn _mm256_mask_permute_pd<const MASK: i32>(
23060 src: __m256d,
23061 k: __mmask8,
23062 a: __m256d,
23063) -> __m256d {
23064 unsafe {
23065 static_assert_uimm_bits!(MASK, 4);
23066 let r: __m256d = _mm256_permute_pd::<MASK>(a);
23067 transmute(src:simd_select_bitmask(m:k, yes:r.as_f64x4(), no:src.as_f64x4()))
23068 }
23069}
23070
23071/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23072///
23073/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permute_pd&expand=4157)
23074#[inline]
23075#[target_feature(enable = "avx512f,avx512vl")]
23076#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23077#[cfg_attr(test, assert_instr(vshufpd, MASK = 0b11_01))]
23078#[rustc_legacy_const_generics(2)]
23079#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
23080pub const fn _mm256_maskz_permute_pd<const MASK: i32>(k: __mmask8, a: __m256d) -> __m256d {
23081 unsafe {
23082 static_assert_uimm_bits!(MASK, 4);
23083 let r: __m256d = _mm256_permute_pd::<MASK>(a);
23084 transmute(src:simd_select_bitmask(m:k, yes:r.as_f64x4(), no:f64x4::ZERO))
23085 }
23086}
23087
23088/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23089///
23090/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_permute_pd&expand=4153)
23091#[inline]
23092#[target_feature(enable = "avx512f,avx512vl")]
23093#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23094#[cfg_attr(test, assert_instr(vshufpd, IMM2 = 0b01))]
23095#[rustc_legacy_const_generics(3)]
23096#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
23097pub const fn _mm_mask_permute_pd<const IMM2: i32>(
23098 src: __m128d,
23099 k: __mmask8,
23100 a: __m128d,
23101) -> __m128d {
23102 unsafe {
23103 static_assert_uimm_bits!(IMM2, 2);
23104 let r: __m128d = _mm_permute_pd::<IMM2>(a);
23105 transmute(src:simd_select_bitmask(m:k, yes:r.as_f64x2(), no:src.as_f64x2()))
23106 }
23107}
23108
23109/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23110///
23111/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_permute_pd&expand=4154)
23112#[inline]
23113#[target_feature(enable = "avx512f,avx512vl")]
23114#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23115#[cfg_attr(test, assert_instr(vshufpd, IMM2 = 0b01))]
23116#[rustc_legacy_const_generics(2)]
23117#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
23118pub const fn _mm_maskz_permute_pd<const IMM2: i32>(k: __mmask8, a: __m128d) -> __m128d {
23119 unsafe {
23120 static_assert_uimm_bits!(IMM2, 2);
23121 let r: __m128d = _mm_permute_pd::<IMM2>(a);
23122 transmute(src:simd_select_bitmask(m:k, yes:r.as_f64x2(), no:f64x2::ZERO))
23123 }
23124}
23125
23126/// Shuffle 64-bit integers in a within 256-bit lanes using the control in imm8, and store the results in dst.
23127///
23128/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutex_epi64&expand=4208)
23129#[inline]
23130#[target_feature(enable = "avx512f")]
23131#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23132#[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermq
23133#[rustc_legacy_const_generics(1)]
23134#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
23135pub const fn _mm512_permutex_epi64<const MASK: i32>(a: __m512i) -> __m512i {
23136 unsafe {
23137 static_assert_uimm_bits!(MASK, 8);
23138 simd_shuffle!(
23139 a,
23140 a,
23141 [
23142 MASK as u32 & 0b11,
23143 (MASK as u32 >> 2) & 0b11,
23144 ((MASK as u32 >> 4) & 0b11),
23145 ((MASK as u32 >> 6) & 0b11),
23146 (MASK as u32 & 0b11) + 4,
23147 ((MASK as u32 >> 2) & 0b11) + 4,
23148 ((MASK as u32 >> 4) & 0b11) + 4,
23149 ((MASK as u32 >> 6) & 0b11) + 4,
23150 ],
23151 )
23152 }
23153}
23154
23155/// Shuffle 64-bit integers in a within 256-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23156///
23157/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutex_epi64&expand=4206)
23158#[inline]
23159#[target_feature(enable = "avx512f")]
23160#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23161#[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermq
23162#[rustc_legacy_const_generics(3)]
23163#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
23164pub const fn _mm512_mask_permutex_epi64<const MASK: i32>(
23165 src: __m512i,
23166 k: __mmask8,
23167 a: __m512i,
23168) -> __m512i {
23169 unsafe {
23170 static_assert_uimm_bits!(MASK, 8);
23171 let r: __m512i = _mm512_permutex_epi64::<MASK>(a);
23172 transmute(src:simd_select_bitmask(m:k, yes:r.as_i64x8(), no:src.as_i64x8()))
23173 }
23174}
23175
23176/// Shuffle 64-bit integers in a within 256-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23177///
23178/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutex_epi64&expand=4207)
23179#[inline]
23180#[target_feature(enable = "avx512f")]
23181#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23182#[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermq
23183#[rustc_legacy_const_generics(2)]
23184#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
23185pub const fn _mm512_maskz_permutex_epi64<const MASK: i32>(k: __mmask8, a: __m512i) -> __m512i {
23186 unsafe {
23187 static_assert_uimm_bits!(MASK, 8);
23188 let r: __m512i = _mm512_permutex_epi64::<MASK>(a);
23189 transmute(src:simd_select_bitmask(m:k, yes:r.as_i64x8(), no:i64x8::ZERO))
23190 }
23191}
23192
23193/// Shuffle 64-bit integers in a within 256-bit lanes using the control in imm8, and store the results in dst.
23194///
23195/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutex_epi64&expand=4205)
23196#[inline]
23197#[target_feature(enable = "avx512f,avx512vl")]
23198#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23199#[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermq
23200#[rustc_legacy_const_generics(1)]
23201#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
23202pub const fn _mm256_permutex_epi64<const MASK: i32>(a: __m256i) -> __m256i {
23203 unsafe {
23204 static_assert_uimm_bits!(MASK, 8);
23205 simd_shuffle!(
23206 a,
23207 a,
23208 [
23209 MASK as u32 & 0b11,
23210 (MASK as u32 >> 2) & 0b11,
23211 ((MASK as u32 >> 4) & 0b11),
23212 ((MASK as u32 >> 6) & 0b11),
23213 ],
23214 )
23215 }
23216}
23217
23218/// Shuffle 64-bit integers in a within 256-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23219///
23220/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutex_epi64&expand=4203)
23221#[inline]
23222#[target_feature(enable = "avx512f,avx512vl")]
23223#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23224#[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermq
23225#[rustc_legacy_const_generics(3)]
23226#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
23227pub const fn _mm256_mask_permutex_epi64<const MASK: i32>(
23228 src: __m256i,
23229 k: __mmask8,
23230 a: __m256i,
23231) -> __m256i {
23232 unsafe {
23233 static_assert_uimm_bits!(MASK, 8);
23234 let r: __m256i = _mm256_permutex_epi64::<MASK>(a);
23235 transmute(src:simd_select_bitmask(m:k, yes:r.as_i64x4(), no:src.as_i64x4()))
23236 }
23237}
23238
23239/// Shuffle 64-bit integers in a within 256-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23240///
23241/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutex_epi64&expand=4204)
23242#[inline]
23243#[target_feature(enable = "avx512f,avx512vl")]
23244#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23245#[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermq
23246#[rustc_legacy_const_generics(2)]
23247#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
23248pub const fn _mm256_maskz_permutex_epi64<const MASK: i32>(k: __mmask8, a: __m256i) -> __m256i {
23249 unsafe {
23250 static_assert_uimm_bits!(MASK, 8);
23251 let r: __m256i = _mm256_permutex_epi64::<MASK>(a);
23252 transmute(src:simd_select_bitmask(m:k, yes:r.as_i64x4(), no:i64x4::ZERO))
23253 }
23254}
23255
23256/// Shuffle double-precision (64-bit) floating-point elements in a within 256-bit lanes using the control in imm8, and store the results in dst.
23257///
23258/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutex_pd&expand=4214)
23259#[inline]
23260#[target_feature(enable = "avx512f")]
23261#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23262#[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermpd
23263#[rustc_legacy_const_generics(1)]
23264#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
23265pub const fn _mm512_permutex_pd<const MASK: i32>(a: __m512d) -> __m512d {
23266 unsafe {
23267 static_assert_uimm_bits!(MASK, 8);
23268 simd_shuffle!(
23269 a,
23270 a,
23271 [
23272 MASK as u32 & 0b11,
23273 (MASK as u32 >> 2) & 0b11,
23274 ((MASK as u32 >> 4) & 0b11),
23275 ((MASK as u32 >> 6) & 0b11),
23276 (MASK as u32 & 0b11) + 4,
23277 ((MASK as u32 >> 2) & 0b11) + 4,
23278 ((MASK as u32 >> 4) & 0b11) + 4,
23279 ((MASK as u32 >> 6) & 0b11) + 4,
23280 ],
23281 )
23282 }
23283}
23284
23285/// Shuffle double-precision (64-bit) floating-point elements in a within 256-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23286///
23287/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutex_pd&expand=4212)
23288#[inline]
23289#[target_feature(enable = "avx512f")]
23290#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23291#[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermpd
23292#[rustc_legacy_const_generics(3)]
23293#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
23294pub const fn _mm512_mask_permutex_pd<const MASK: i32>(
23295 src: __m512d,
23296 k: __mmask8,
23297 a: __m512d,
23298) -> __m512d {
23299 unsafe {
23300 let r: __m512d = _mm512_permutex_pd::<MASK>(a);
23301 transmute(src:simd_select_bitmask(m:k, yes:r.as_f64x8(), no:src.as_f64x8()))
23302 }
23303}
23304
23305/// Shuffle double-precision (64-bit) floating-point elements in a within 256-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23306///
23307/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutex_pd&expand=4213)
23308#[inline]
23309#[target_feature(enable = "avx512f")]
23310#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23311#[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermpd
23312#[rustc_legacy_const_generics(2)]
23313#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
23314pub const fn _mm512_maskz_permutex_pd<const MASK: i32>(k: __mmask8, a: __m512d) -> __m512d {
23315 unsafe {
23316 let r: __m512d = _mm512_permutex_pd::<MASK>(a);
23317 transmute(src:simd_select_bitmask(m:k, yes:r.as_f64x8(), no:f64x8::ZERO))
23318 }
23319}
23320
23321/// Shuffle double-precision (64-bit) floating-point elements in a within 256-bit lanes using the control in imm8, and store the results in dst.
23322///
23323/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutex_pd&expand=4211)
23324#[inline]
23325#[target_feature(enable = "avx512f,avx512vl")]
23326#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23327#[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermpd
23328#[rustc_legacy_const_generics(1)]
23329#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
23330pub const fn _mm256_permutex_pd<const MASK: i32>(a: __m256d) -> __m256d {
23331 unsafe {
23332 static_assert_uimm_bits!(MASK, 8);
23333 simd_shuffle!(
23334 a,
23335 a,
23336 [
23337 MASK as u32 & 0b11,
23338 (MASK as u32 >> 2) & 0b11,
23339 ((MASK as u32 >> 4) & 0b11),
23340 ((MASK as u32 >> 6) & 0b11),
23341 ],
23342 )
23343 }
23344}
23345
23346/// Shuffle double-precision (64-bit) floating-point elements in a within 256-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23347///
23348/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutex_pd&expand=4209)
23349#[inline]
23350#[target_feature(enable = "avx512f,avx512vl")]
23351#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23352#[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermpd
23353#[rustc_legacy_const_generics(3)]
23354#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
23355pub const fn _mm256_mask_permutex_pd<const MASK: i32>(
23356 src: __m256d,
23357 k: __mmask8,
23358 a: __m256d,
23359) -> __m256d {
23360 unsafe {
23361 static_assert_uimm_bits!(MASK, 8);
23362 let r: __m256d = _mm256_permutex_pd::<MASK>(a);
23363 transmute(src:simd_select_bitmask(m:k, yes:r.as_f64x4(), no:src.as_f64x4()))
23364 }
23365}
23366
23367/// Shuffle double-precision (64-bit) floating-point elements in a within 256-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23368///
23369/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutex_pd&expand=4210)
23370#[inline]
23371#[target_feature(enable = "avx512f,avx512vl")]
23372#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23373#[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermpd
23374#[rustc_legacy_const_generics(2)]
23375#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
23376pub const fn _mm256_maskz_permutex_pd<const MASK: i32>(k: __mmask8, a: __m256d) -> __m256d {
23377 unsafe {
23378 static_assert_uimm_bits!(MASK, 8);
23379 let r: __m256d = _mm256_permutex_pd::<MASK>(a);
23380 transmute(src:simd_select_bitmask(m:k, yes:r.as_f64x4(), no:f64x4::ZERO))
23381 }
23382}
23383
23384/// Shuffle 32-bit integers in a across lanes using the corresponding index in idx, and store the results in dst. Note that this intrinsic shuffles across 128-bit lanes, unlike past intrinsics that use the permutevar name. This intrinsic is identical to _mm512_permutexvar_epi32, and it is recommended that you use that intrinsic name.
23385///
23386/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutevar_epi32&expand=4182)
23387#[inline]
23388#[target_feature(enable = "avx512f")]
23389#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23390#[cfg_attr(test, assert_instr(vperm))] //should be vpermd
23391pub fn _mm512_permutevar_epi32(idx: __m512i, a: __m512i) -> __m512i {
23392 unsafe { transmute(src:vpermd(a.as_i32x16(), idx.as_i32x16())) }
23393}
23394
23395/// Shuffle 32-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). Note that this intrinsic shuffles across 128-bit lanes, unlike past intrinsics that use the permutevar name. This intrinsic is identical to _mm512_mask_permutexvar_epi32, and it is recommended that you use that intrinsic name.
23396///
23397/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutevar_epi32&expand=4181)
23398#[inline]
23399#[target_feature(enable = "avx512f")]
23400#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23401#[cfg_attr(test, assert_instr(vpermd))]
23402pub fn _mm512_mask_permutevar_epi32(
23403 src: __m512i,
23404 k: __mmask16,
23405 idx: __m512i,
23406 a: __m512i,
23407) -> __m512i {
23408 unsafe {
23409 let permute: Simd = _mm512_permutevar_epi32(idx, a).as_i32x16();
23410 transmute(src:simd_select_bitmask(m:k, yes:permute, no:src.as_i32x16()))
23411 }
23412}
23413
23414/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst.
23415///
23416/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutevar_ps&expand=4200)
23417#[inline]
23418#[target_feature(enable = "avx512f")]
23419#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23420#[cfg_attr(test, assert_instr(vpermilps))]
23421pub fn _mm512_permutevar_ps(a: __m512, b: __m512i) -> __m512 {
23422 unsafe { transmute(src:vpermilps(a.as_f32x16(), b.as_i32x16())) }
23423}
23424
23425/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23426///
23427/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutevar_ps&expand=4198)
23428#[inline]
23429#[target_feature(enable = "avx512f")]
23430#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23431#[cfg_attr(test, assert_instr(vpermilps))]
23432pub fn _mm512_mask_permutevar_ps(src: __m512, k: __mmask16, a: __m512, b: __m512i) -> __m512 {
23433 unsafe {
23434 let permute: Simd = _mm512_permutevar_ps(a, b).as_f32x16();
23435 transmute(src:simd_select_bitmask(m:k, yes:permute, no:src.as_f32x16()))
23436 }
23437}
23438
23439/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23440///
23441/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutevar_ps&expand=4199)
23442#[inline]
23443#[target_feature(enable = "avx512f")]
23444#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23445#[cfg_attr(test, assert_instr(vpermilps))]
23446pub fn _mm512_maskz_permutevar_ps(k: __mmask16, a: __m512, b: __m512i) -> __m512 {
23447 unsafe {
23448 let permute: Simd = _mm512_permutevar_ps(a, b).as_f32x16();
23449 transmute(src:simd_select_bitmask(m:k, yes:permute, no:f32x16::ZERO))
23450 }
23451}
23452
23453/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23454///
23455/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm256_mask_permutevar_ps&expand=4195)
23456#[inline]
23457#[target_feature(enable = "avx512f,avx512vl")]
23458#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23459#[cfg_attr(test, assert_instr(vpermilps))]
23460pub fn _mm256_mask_permutevar_ps(src: __m256, k: __mmask8, a: __m256, b: __m256i) -> __m256 {
23461 unsafe {
23462 let permute: Simd = _mm256_permutevar_ps(a, b).as_f32x8();
23463 transmute(src:simd_select_bitmask(m:k, yes:permute, no:src.as_f32x8()))
23464 }
23465}
23466
23467/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23468///
23469/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutevar_ps&expand=4196)
23470#[inline]
23471#[target_feature(enable = "avx512f,avx512vl")]
23472#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23473#[cfg_attr(test, assert_instr(vpermilps))]
23474pub fn _mm256_maskz_permutevar_ps(k: __mmask8, a: __m256, b: __m256i) -> __m256 {
23475 unsafe {
23476 let permute: Simd = _mm256_permutevar_ps(a, b).as_f32x8();
23477 transmute(src:simd_select_bitmask(m:k, yes:permute, no:f32x8::ZERO))
23478 }
23479}
23480
23481/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23482///
23483/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_permutevar_ps&expand=4192)
23484#[inline]
23485#[target_feature(enable = "avx512f,avx512vl")]
23486#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23487#[cfg_attr(test, assert_instr(vpermilps))]
23488pub fn _mm_mask_permutevar_ps(src: __m128, k: __mmask8, a: __m128, b: __m128i) -> __m128 {
23489 unsafe {
23490 let permute: Simd = _mm_permutevar_ps(a, b).as_f32x4();
23491 transmute(src:simd_select_bitmask(m:k, yes:permute, no:src.as_f32x4()))
23492 }
23493}
23494
23495/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23496///
23497/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_permutevar_ps&expand=4193)
23498#[inline]
23499#[target_feature(enable = "avx512f,avx512vl")]
23500#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23501#[cfg_attr(test, assert_instr(vpermilps))]
23502pub fn _mm_maskz_permutevar_ps(k: __mmask8, a: __m128, b: __m128i) -> __m128 {
23503 unsafe {
23504 let permute: Simd = _mm_permutevar_ps(a, b).as_f32x4();
23505 transmute(src:simd_select_bitmask(m:k, yes:permute, no:f32x4::ZERO))
23506 }
23507}
23508
23509/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst.
23510///
23511/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutevar_pd&expand=4191)
23512#[inline]
23513#[target_feature(enable = "avx512f")]
23514#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23515#[cfg_attr(test, assert_instr(vpermilpd))]
23516pub fn _mm512_permutevar_pd(a: __m512d, b: __m512i) -> __m512d {
23517 unsafe { transmute(src:vpermilpd(a.as_f64x8(), b.as_i64x8())) }
23518}
23519
23520/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23521///
23522/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutevar_pd&expand=4189)
23523#[inline]
23524#[target_feature(enable = "avx512f")]
23525#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23526#[cfg_attr(test, assert_instr(vpermilpd))]
23527pub fn _mm512_mask_permutevar_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512i) -> __m512d {
23528 unsafe {
23529 let permute: Simd = _mm512_permutevar_pd(a, b).as_f64x8();
23530 transmute(src:simd_select_bitmask(m:k, yes:permute, no:src.as_f64x8()))
23531 }
23532}
23533
23534/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23535///
23536/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutevar_pd&expand=4190)
23537#[inline]
23538#[target_feature(enable = "avx512f")]
23539#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23540#[cfg_attr(test, assert_instr(vpermilpd))]
23541pub fn _mm512_maskz_permutevar_pd(k: __mmask8, a: __m512d, b: __m512i) -> __m512d {
23542 unsafe {
23543 let permute: Simd = _mm512_permutevar_pd(a, b).as_f64x8();
23544 transmute(src:simd_select_bitmask(m:k, yes:permute, no:f64x8::ZERO))
23545 }
23546}
23547
23548/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23549///
23550/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutevar_pd&expand=4186)
23551#[inline]
23552#[target_feature(enable = "avx512f,avx512vl")]
23553#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23554#[cfg_attr(test, assert_instr(vpermilpd))]
23555pub fn _mm256_mask_permutevar_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256i) -> __m256d {
23556 unsafe {
23557 let permute: Simd = _mm256_permutevar_pd(a, b).as_f64x4();
23558 transmute(src:simd_select_bitmask(m:k, yes:permute, no:src.as_f64x4()))
23559 }
23560}
23561
23562/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23563///
23564/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutevar_pd&expand=4187)
23565#[inline]
23566#[target_feature(enable = "avx512f,avx512vl")]
23567#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23568#[cfg_attr(test, assert_instr(vpermilpd))]
23569pub fn _mm256_maskz_permutevar_pd(k: __mmask8, a: __m256d, b: __m256i) -> __m256d {
23570 unsafe {
23571 let permute: Simd = _mm256_permutevar_pd(a, b).as_f64x4();
23572 transmute(src:simd_select_bitmask(m:k, yes:permute, no:f64x4::ZERO))
23573 }
23574}
23575
23576/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23577///
23578/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_permutevar_pd&expand=4183)
23579#[inline]
23580#[target_feature(enable = "avx512f,avx512vl")]
23581#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23582#[cfg_attr(test, assert_instr(vpermilpd))]
23583pub fn _mm_mask_permutevar_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128i) -> __m128d {
23584 unsafe {
23585 let permute: Simd = _mm_permutevar_pd(a, b).as_f64x2();
23586 transmute(src:simd_select_bitmask(m:k, yes:permute, no:src.as_f64x2()))
23587 }
23588}
23589
23590/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23591///
23592/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_permutevar_pd&expand=4184)
23593#[inline]
23594#[target_feature(enable = "avx512f,avx512vl")]
23595#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23596#[cfg_attr(test, assert_instr(vpermilpd))]
23597pub fn _mm_maskz_permutevar_pd(k: __mmask8, a: __m128d, b: __m128i) -> __m128d {
23598 unsafe {
23599 let permute: Simd = _mm_permutevar_pd(a, b).as_f64x2();
23600 transmute(src:simd_select_bitmask(m:k, yes:permute, no:f64x2::ZERO))
23601 }
23602}
23603
23604/// Shuffle 32-bit integers in a across lanes using the corresponding index in idx, and store the results in dst.
23605///
23606/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutexvar_epi32&expand=4301)
23607#[inline]
23608#[target_feature(enable = "avx512f")]
23609#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23610#[cfg_attr(test, assert_instr(vperm))] //should be vpermd
23611pub fn _mm512_permutexvar_epi32(idx: __m512i, a: __m512i) -> __m512i {
23612 unsafe { transmute(src:vpermd(a.as_i32x16(), idx.as_i32x16())) }
23613}
23614
23615/// Shuffle 32-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23616///
23617/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutexvar_epi32&expand=4299)
23618#[inline]
23619#[target_feature(enable = "avx512f")]
23620#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23621#[cfg_attr(test, assert_instr(vpermd))]
23622pub fn _mm512_mask_permutexvar_epi32(
23623 src: __m512i,
23624 k: __mmask16,
23625 idx: __m512i,
23626 a: __m512i,
23627) -> __m512i {
23628 unsafe {
23629 let permute: Simd = _mm512_permutexvar_epi32(idx, a).as_i32x16();
23630 transmute(src:simd_select_bitmask(m:k, yes:permute, no:src.as_i32x16()))
23631 }
23632}
23633
23634/// Shuffle 32-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23635///
23636/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutexvar_epi32&expand=4300)
23637#[inline]
23638#[target_feature(enable = "avx512f")]
23639#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23640#[cfg_attr(test, assert_instr(vpermd))]
23641pub fn _mm512_maskz_permutexvar_epi32(k: __mmask16, idx: __m512i, a: __m512i) -> __m512i {
23642 unsafe {
23643 let permute: Simd = _mm512_permutexvar_epi32(idx, a).as_i32x16();
23644 transmute(src:simd_select_bitmask(m:k, yes:permute, no:i32x16::ZERO))
23645 }
23646}
23647
23648/// Shuffle 32-bit integers in a across lanes using the corresponding index in idx, and store the results in dst.
23649///
23650/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutexvar_epi32&expand=4298)
23651#[inline]
23652#[target_feature(enable = "avx512f,avx512vl")]
23653#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23654#[cfg_attr(test, assert_instr(vperm))] //should be vpermd
23655pub fn _mm256_permutexvar_epi32(idx: __m256i, a: __m256i) -> __m256i {
23656 _mm256_permutevar8x32_epi32(a, b:idx) // llvm use llvm.x86.avx2.permd
23657}
23658
23659/// Shuffle 32-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23660///
23661/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutexvar_epi32&expand=4296)
23662#[inline]
23663#[target_feature(enable = "avx512f,avx512vl")]
23664#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23665#[cfg_attr(test, assert_instr(vpermd))]
23666pub fn _mm256_mask_permutexvar_epi32(
23667 src: __m256i,
23668 k: __mmask8,
23669 idx: __m256i,
23670 a: __m256i,
23671) -> __m256i {
23672 unsafe {
23673 let permute: Simd = _mm256_permutexvar_epi32(idx, a).as_i32x8();
23674 transmute(src:simd_select_bitmask(m:k, yes:permute, no:src.as_i32x8()))
23675 }
23676}
23677
23678/// Shuffle 32-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23679///
23680/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutexvar_epi32&expand=4297)
23681#[inline]
23682#[target_feature(enable = "avx512f,avx512vl")]
23683#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23684#[cfg_attr(test, assert_instr(vpermd))]
23685pub fn _mm256_maskz_permutexvar_epi32(k: __mmask8, idx: __m256i, a: __m256i) -> __m256i {
23686 unsafe {
23687 let permute: Simd = _mm256_permutexvar_epi32(idx, a).as_i32x8();
23688 transmute(src:simd_select_bitmask(m:k, yes:permute, no:i32x8::ZERO))
23689 }
23690}
23691
23692/// Shuffle 64-bit integers in a across lanes using the corresponding index in idx, and store the results in dst.
23693///
23694/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutexvar_epi64&expand=4307)
23695#[inline]
23696#[target_feature(enable = "avx512f")]
23697#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23698#[cfg_attr(test, assert_instr(vperm))] //should be vpermq
23699pub fn _mm512_permutexvar_epi64(idx: __m512i, a: __m512i) -> __m512i {
23700 unsafe { transmute(src:vpermq(a.as_i64x8(), idx.as_i64x8())) }
23701}
23702
23703/// Shuffle 64-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23704///
23705/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutexvar_epi64&expand=4305)
23706#[inline]
23707#[target_feature(enable = "avx512f")]
23708#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23709#[cfg_attr(test, assert_instr(vpermq))]
23710pub fn _mm512_mask_permutexvar_epi64(
23711 src: __m512i,
23712 k: __mmask8,
23713 idx: __m512i,
23714 a: __m512i,
23715) -> __m512i {
23716 unsafe {
23717 let permute: Simd = _mm512_permutexvar_epi64(idx, a).as_i64x8();
23718 transmute(src:simd_select_bitmask(m:k, yes:permute, no:src.as_i64x8()))
23719 }
23720}
23721
23722/// Shuffle 64-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23723///
23724/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutexvar_epi64&expand=4306)
23725#[inline]
23726#[target_feature(enable = "avx512f")]
23727#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23728#[cfg_attr(test, assert_instr(vpermq))]
23729pub fn _mm512_maskz_permutexvar_epi64(k: __mmask8, idx: __m512i, a: __m512i) -> __m512i {
23730 unsafe {
23731 let permute: Simd = _mm512_permutexvar_epi64(idx, a).as_i64x8();
23732 transmute(src:simd_select_bitmask(m:k, yes:permute, no:i64x8::ZERO))
23733 }
23734}
23735
23736/// Shuffle 64-bit integers in a across lanes using the corresponding index in idx, and store the results in dst.
23737///
23738/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutexvar_epi64&expand=4304)
23739#[inline]
23740#[target_feature(enable = "avx512f,avx512vl")]
23741#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23742#[cfg_attr(test, assert_instr(vperm))] //should be vpermq
23743pub fn _mm256_permutexvar_epi64(idx: __m256i, a: __m256i) -> __m256i {
23744 unsafe { transmute(src:vpermq256(a.as_i64x4(), idx.as_i64x4())) }
23745}
23746
23747/// Shuffle 64-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23748///
23749/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutexvar_epi64&expand=4302)
23750#[inline]
23751#[target_feature(enable = "avx512f,avx512vl")]
23752#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23753#[cfg_attr(test, assert_instr(vpermq))]
23754pub fn _mm256_mask_permutexvar_epi64(
23755 src: __m256i,
23756 k: __mmask8,
23757 idx: __m256i,
23758 a: __m256i,
23759) -> __m256i {
23760 unsafe {
23761 let permute: Simd = _mm256_permutexvar_epi64(idx, a).as_i64x4();
23762 transmute(src:simd_select_bitmask(m:k, yes:permute, no:src.as_i64x4()))
23763 }
23764}
23765
23766/// Shuffle 64-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23767///
23768/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutexvar_epi64&expand=4303)
23769#[inline]
23770#[target_feature(enable = "avx512f,avx512vl")]
23771#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23772#[cfg_attr(test, assert_instr(vpermq))]
23773pub fn _mm256_maskz_permutexvar_epi64(k: __mmask8, idx: __m256i, a: __m256i) -> __m256i {
23774 unsafe {
23775 let permute: Simd = _mm256_permutexvar_epi64(idx, a).as_i64x4();
23776 transmute(src:simd_select_bitmask(m:k, yes:permute, no:i64x4::ZERO))
23777 }
23778}
23779
23780/// Shuffle single-precision (32-bit) floating-point elements in a across lanes using the corresponding index in idx.
23781///
23782/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutexvar_ps&expand=4200)
23783#[inline]
23784#[target_feature(enable = "avx512f")]
23785#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23786#[cfg_attr(test, assert_instr(vpermps))]
23787pub fn _mm512_permutexvar_ps(idx: __m512i, a: __m512) -> __m512 {
23788 unsafe { transmute(src:vpermps(a.as_f32x16(), idx.as_i32x16())) }
23789}
23790
23791/// Shuffle single-precision (32-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23792///
23793/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutexvar_ps&expand=4326)
23794#[inline]
23795#[target_feature(enable = "avx512f")]
23796#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23797#[cfg_attr(test, assert_instr(vpermps))]
23798pub fn _mm512_mask_permutexvar_ps(src: __m512, k: __mmask16, idx: __m512i, a: __m512) -> __m512 {
23799 unsafe {
23800 let permute: Simd = _mm512_permutexvar_ps(idx, a).as_f32x16();
23801 transmute(src:simd_select_bitmask(m:k, yes:permute, no:src.as_f32x16()))
23802 }
23803}
23804
23805/// Shuffle single-precision (32-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23806///
23807/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutexvar_ps&expand=4327)
23808#[inline]
23809#[target_feature(enable = "avx512f")]
23810#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23811#[cfg_attr(test, assert_instr(vpermps))]
23812pub fn _mm512_maskz_permutexvar_ps(k: __mmask16, idx: __m512i, a: __m512) -> __m512 {
23813 unsafe {
23814 let permute: Simd = _mm512_permutexvar_ps(idx, a).as_f32x16();
23815 transmute(src:simd_select_bitmask(m:k, yes:permute, no:f32x16::ZERO))
23816 }
23817}
23818
23819/// Shuffle single-precision (32-bit) floating-point elements in a across lanes using the corresponding index in idx.
23820///
23821/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutexvar_ps&expand=4325)
23822#[inline]
23823#[target_feature(enable = "avx512f,avx512vl")]
23824#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23825#[cfg_attr(test, assert_instr(vpermps))]
23826pub fn _mm256_permutexvar_ps(idx: __m256i, a: __m256) -> __m256 {
23827 _mm256_permutevar8x32_ps(a, idx) //llvm.x86.avx2.permps
23828}
23829
23830/// Shuffle single-precision (32-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23831///
23832/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutexvar_ps&expand=4323)
23833#[inline]
23834#[target_feature(enable = "avx512f,avx512vl")]
23835#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23836#[cfg_attr(test, assert_instr(vpermps))]
23837pub fn _mm256_mask_permutexvar_ps(src: __m256, k: __mmask8, idx: __m256i, a: __m256) -> __m256 {
23838 unsafe {
23839 let permute: Simd = _mm256_permutexvar_ps(idx, a).as_f32x8();
23840 transmute(src:simd_select_bitmask(m:k, yes:permute, no:src.as_f32x8()))
23841 }
23842}
23843
23844/// Shuffle single-precision (32-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23845///
23846/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutexvar_ps&expand=4324)
23847#[inline]
23848#[target_feature(enable = "avx512f,avx512vl")]
23849#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23850#[cfg_attr(test, assert_instr(vpermps))]
23851pub fn _mm256_maskz_permutexvar_ps(k: __mmask8, idx: __m256i, a: __m256) -> __m256 {
23852 unsafe {
23853 let permute: Simd = _mm256_permutexvar_ps(idx, a).as_f32x8();
23854 transmute(src:simd_select_bitmask(m:k, yes:permute, no:f32x8::ZERO))
23855 }
23856}
23857
23858/// Shuffle double-precision (64-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst.
23859///
23860/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutexvar_pd&expand=4322)
23861#[inline]
23862#[target_feature(enable = "avx512f")]
23863#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23864#[cfg_attr(test, assert_instr(vpermpd))]
23865pub fn _mm512_permutexvar_pd(idx: __m512i, a: __m512d) -> __m512d {
23866 unsafe { transmute(src:vpermpd(a.as_f64x8(), idx.as_i64x8())) }
23867}
23868
23869/// Shuffle double-precision (64-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23870///
23871/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutexvar_pd&expand=4320)
23872#[inline]
23873#[target_feature(enable = "avx512f")]
23874#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23875#[cfg_attr(test, assert_instr(vpermpd))]
23876pub fn _mm512_mask_permutexvar_pd(src: __m512d, k: __mmask8, idx: __m512i, a: __m512d) -> __m512d {
23877 unsafe {
23878 let permute: Simd = _mm512_permutexvar_pd(idx, a).as_f64x8();
23879 transmute(src:simd_select_bitmask(m:k, yes:permute, no:src.as_f64x8()))
23880 }
23881}
23882
23883/// Shuffle double-precision (64-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23884///
23885/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutexvar_pd&expand=4321)
23886#[inline]
23887#[target_feature(enable = "avx512f")]
23888#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23889#[cfg_attr(test, assert_instr(vpermpd))]
23890pub fn _mm512_maskz_permutexvar_pd(k: __mmask8, idx: __m512i, a: __m512d) -> __m512d {
23891 unsafe {
23892 let permute: Simd = _mm512_permutexvar_pd(idx, a).as_f64x8();
23893 transmute(src:simd_select_bitmask(m:k, yes:permute, no:f64x8::ZERO))
23894 }
23895}
23896
23897/// Shuffle double-precision (64-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst.
23898///
23899/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutexvar_pd&expand=4319)
23900#[inline]
23901#[target_feature(enable = "avx512f,avx512vl")]
23902#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23903#[cfg_attr(test, assert_instr(vpermpd))]
23904pub fn _mm256_permutexvar_pd(idx: __m256i, a: __m256d) -> __m256d {
23905 unsafe { transmute(src:vpermpd256(a.as_f64x4(), idx.as_i64x4())) }
23906}
23907
23908/// Shuffle double-precision (64-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23909///
23910/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutexvar_pd&expand=4317)
23911#[inline]
23912#[target_feature(enable = "avx512f,avx512vl")]
23913#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23914#[cfg_attr(test, assert_instr(vpermpd))]
23915pub fn _mm256_mask_permutexvar_pd(src: __m256d, k: __mmask8, idx: __m256i, a: __m256d) -> __m256d {
23916 unsafe {
23917 let permute: Simd = _mm256_permutexvar_pd(idx, a).as_f64x4();
23918 transmute(src:simd_select_bitmask(m:k, yes:permute, no:src.as_f64x4()))
23919 }
23920}
23921
23922/// Shuffle double-precision (64-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23923///
23924/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutexvar_pd&expand=4318)
23925#[inline]
23926#[target_feature(enable = "avx512f,avx512vl")]
23927#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23928#[cfg_attr(test, assert_instr(vpermpd))]
23929pub fn _mm256_maskz_permutexvar_pd(k: __mmask8, idx: __m256i, a: __m256d) -> __m256d {
23930 unsafe {
23931 let permute: Simd = _mm256_permutexvar_pd(idx, a).as_f64x4();
23932 transmute(src:simd_select_bitmask(m:k, yes:permute, no:f64x4::ZERO))
23933 }
23934}
23935
23936/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
23937///
23938/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutex2var_epi32&expand=4238)
23939#[inline]
23940#[target_feature(enable = "avx512f")]
23941#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23942#[cfg_attr(test, assert_instr(vperm))] //vpermi2d or vpermt2d
23943pub fn _mm512_permutex2var_epi32(a: __m512i, idx: __m512i, b: __m512i) -> __m512i {
23944 unsafe { transmute(src:vpermi2d(a.as_i32x16(), idx.as_i32x16(), b.as_i32x16())) }
23945}
23946
23947/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
23948///
23949/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutex2var_epi32&expand=4235)
23950#[inline]
23951#[target_feature(enable = "avx512f")]
23952#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23953#[cfg_attr(test, assert_instr(vpermt2d))]
23954pub fn _mm512_mask_permutex2var_epi32(
23955 a: __m512i,
23956 k: __mmask16,
23957 idx: __m512i,
23958 b: __m512i,
23959) -> __m512i {
23960 unsafe {
23961 let permute: Simd = _mm512_permutex2var_epi32(a, idx, b).as_i32x16();
23962 transmute(src:simd_select_bitmask(m:k, yes:permute, no:a.as_i32x16()))
23963 }
23964}
23965
23966/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23967///
23968/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutex2var_epi32&expand=4237)
23969#[inline]
23970#[target_feature(enable = "avx512f")]
23971#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23972#[cfg_attr(test, assert_instr(vperm))] //vpermi2d or vpermt2d
23973pub fn _mm512_maskz_permutex2var_epi32(
23974 k: __mmask16,
23975 a: __m512i,
23976 idx: __m512i,
23977 b: __m512i,
23978) -> __m512i {
23979 unsafe {
23980 let permute: Simd = _mm512_permutex2var_epi32(a, idx, b).as_i32x16();
23981 transmute(src:simd_select_bitmask(m:k, yes:permute, no:i32x16::ZERO))
23982 }
23983}
23984
23985/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
23986///
23987/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask2_permutex2var_epi32&expand=4236)
23988#[inline]
23989#[target_feature(enable = "avx512f")]
23990#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23991#[cfg_attr(test, assert_instr(vpermi2d))]
23992pub fn _mm512_mask2_permutex2var_epi32(
23993 a: __m512i,
23994 idx: __m512i,
23995 k: __mmask16,
23996 b: __m512i,
23997) -> __m512i {
23998 unsafe {
23999 let permute: Simd = _mm512_permutex2var_epi32(a, idx, b).as_i32x16();
24000 transmute(src:simd_select_bitmask(m:k, yes:permute, no:idx.as_i32x16()))
24001 }
24002}
24003
24004/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
24005///
24006/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutex2var_epi32&expand=4234)
24007#[inline]
24008#[target_feature(enable = "avx512f,avx512vl")]
24009#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24010#[cfg_attr(test, assert_instr(vperm))] //vpermi2d or vpermt2d
24011pub fn _mm256_permutex2var_epi32(a: __m256i, idx: __m256i, b: __m256i) -> __m256i {
24012 unsafe { transmute(src:vpermi2d256(a.as_i32x8(), idx.as_i32x8(), b.as_i32x8())) }
24013}
24014
24015/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
24016///
24017/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutex2var_epi32&expand=4231)
24018#[inline]
24019#[target_feature(enable = "avx512f,avx512vl")]
24020#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24021#[cfg_attr(test, assert_instr(vpermt2d))]
24022pub fn _mm256_mask_permutex2var_epi32(
24023 a: __m256i,
24024 k: __mmask8,
24025 idx: __m256i,
24026 b: __m256i,
24027) -> __m256i {
24028 unsafe {
24029 let permute: Simd = _mm256_permutex2var_epi32(a, idx, b).as_i32x8();
24030 transmute(src:simd_select_bitmask(m:k, yes:permute, no:a.as_i32x8()))
24031 }
24032}
24033
24034/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24035///
24036/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutex2var_epi32&expand=4233)
24037#[inline]
24038#[target_feature(enable = "avx512f,avx512vl")]
24039#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24040#[cfg_attr(test, assert_instr(vperm))] //vpermi2d or vpermt2d
24041pub fn _mm256_maskz_permutex2var_epi32(
24042 k: __mmask8,
24043 a: __m256i,
24044 idx: __m256i,
24045 b: __m256i,
24046) -> __m256i {
24047 unsafe {
24048 let permute: Simd = _mm256_permutex2var_epi32(a, idx, b).as_i32x8();
24049 transmute(src:simd_select_bitmask(m:k, yes:permute, no:i32x8::ZERO))
24050 }
24051}
24052
24053/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
24054///
24055/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask2_permutex2var_epi32&expand=4232)
24056#[inline]
24057#[target_feature(enable = "avx512f,avx512vl")]
24058#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24059#[cfg_attr(test, assert_instr(vpermi2d))]
24060pub fn _mm256_mask2_permutex2var_epi32(
24061 a: __m256i,
24062 idx: __m256i,
24063 k: __mmask8,
24064 b: __m256i,
24065) -> __m256i {
24066 unsafe {
24067 let permute: Simd = _mm256_permutex2var_epi32(a, idx, b).as_i32x8();
24068 transmute(src:simd_select_bitmask(m:k, yes:permute, no:idx.as_i32x8()))
24069 }
24070}
24071
24072/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
24073///
24074/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_permutex2var_epi32&expand=4230)
24075#[inline]
24076#[target_feature(enable = "avx512f,avx512vl")]
24077#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24078#[cfg_attr(test, assert_instr(vperm))] //vpermi2d or vpermt2d
24079pub fn _mm_permutex2var_epi32(a: __m128i, idx: __m128i, b: __m128i) -> __m128i {
24080 unsafe { transmute(src:vpermi2d128(a.as_i32x4(), idx.as_i32x4(), b.as_i32x4())) }
24081}
24082
24083/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
24084///
24085/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_permutex2var_epi32&expand=4227)
24086#[inline]
24087#[target_feature(enable = "avx512f,avx512vl")]
24088#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24089#[cfg_attr(test, assert_instr(vpermt2d))]
24090pub fn _mm_mask_permutex2var_epi32(a: __m128i, k: __mmask8, idx: __m128i, b: __m128i) -> __m128i {
24091 unsafe {
24092 let permute: Simd = _mm_permutex2var_epi32(a, idx, b).as_i32x4();
24093 transmute(src:simd_select_bitmask(m:k, yes:permute, no:a.as_i32x4()))
24094 }
24095}
24096
24097/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24098///
24099/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_permutex2var_epi32&expand=4229)
24100#[inline]
24101#[target_feature(enable = "avx512f,avx512vl")]
24102#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24103#[cfg_attr(test, assert_instr(vperm))] //vpermi2d or vpermt2d
24104pub fn _mm_maskz_permutex2var_epi32(k: __mmask8, a: __m128i, idx: __m128i, b: __m128i) -> __m128i {
24105 unsafe {
24106 let permute: Simd = _mm_permutex2var_epi32(a, idx, b).as_i32x4();
24107 transmute(src:simd_select_bitmask(m:k, yes:permute, no:i32x4::ZERO))
24108 }
24109}
24110
24111/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
24112///
24113/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask2_permutex2var_epi32&expand=4228)
24114#[inline]
24115#[target_feature(enable = "avx512f,avx512vl")]
24116#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24117#[cfg_attr(test, assert_instr(vpermi2d))]
24118pub fn _mm_mask2_permutex2var_epi32(a: __m128i, idx: __m128i, k: __mmask8, b: __m128i) -> __m128i {
24119 unsafe {
24120 let permute: Simd = _mm_permutex2var_epi32(a, idx, b).as_i32x4();
24121 transmute(src:simd_select_bitmask(m:k, yes:permute, no:idx.as_i32x4()))
24122 }
24123}
24124
24125/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
24126///
24127/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutex2var_epi64&expand=4250)
24128#[inline]
24129#[target_feature(enable = "avx512f")]
24130#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24131#[cfg_attr(test, assert_instr(vperm))] //vpermi2q or vpermt2q
24132pub fn _mm512_permutex2var_epi64(a: __m512i, idx: __m512i, b: __m512i) -> __m512i {
24133 unsafe { transmute(src:vpermi2q(a.as_i64x8(), idx.as_i64x8(), b.as_i64x8())) }
24134}
24135
24136/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
24137///
24138/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutex2var_epi64&expand=4247)
24139#[inline]
24140#[target_feature(enable = "avx512f")]
24141#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24142#[cfg_attr(test, assert_instr(vpermt2q))]
24143pub fn _mm512_mask_permutex2var_epi64(
24144 a: __m512i,
24145 k: __mmask8,
24146 idx: __m512i,
24147 b: __m512i,
24148) -> __m512i {
24149 unsafe {
24150 let permute: Simd = _mm512_permutex2var_epi64(a, idx, b).as_i64x8();
24151 transmute(src:simd_select_bitmask(m:k, yes:permute, no:a.as_i64x8()))
24152 }
24153}
24154
24155/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24156///
24157/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutex2var_epi64&expand=4249)
24158#[inline]
24159#[target_feature(enable = "avx512f")]
24160#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24161#[cfg_attr(test, assert_instr(vperm))] //vpermi2q or vpermt2q
24162pub fn _mm512_maskz_permutex2var_epi64(
24163 k: __mmask8,
24164 a: __m512i,
24165 idx: __m512i,
24166 b: __m512i,
24167) -> __m512i {
24168 unsafe {
24169 let permute: Simd = _mm512_permutex2var_epi64(a, idx, b).as_i64x8();
24170 transmute(src:simd_select_bitmask(m:k, yes:permute, no:i64x8::ZERO))
24171 }
24172}
24173
24174/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
24175///
24176/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask2_permutex2var_epi64&expand=4248)
24177#[inline]
24178#[target_feature(enable = "avx512f")]
24179#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24180#[cfg_attr(test, assert_instr(vpermi2q))]
24181pub fn _mm512_mask2_permutex2var_epi64(
24182 a: __m512i,
24183 idx: __m512i,
24184 k: __mmask8,
24185 b: __m512i,
24186) -> __m512i {
24187 unsafe {
24188 let permute: Simd = _mm512_permutex2var_epi64(a, idx, b).as_i64x8();
24189 transmute(src:simd_select_bitmask(m:k, yes:permute, no:idx.as_i64x8()))
24190 }
24191}
24192
24193/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
24194///
24195/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutex2var_epi64&expand=4246)
24196#[inline]
24197#[target_feature(enable = "avx512f,avx512vl")]
24198#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24199#[cfg_attr(test, assert_instr(vperm))] //vpermi2q or vpermt2q
24200pub fn _mm256_permutex2var_epi64(a: __m256i, idx: __m256i, b: __m256i) -> __m256i {
24201 unsafe { transmute(src:vpermi2q256(a.as_i64x4(), idx.as_i64x4(), b.as_i64x4())) }
24202}
24203
24204/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
24205///
24206/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutex2var_epi64&expand=4243)
24207#[inline]
24208#[target_feature(enable = "avx512f,avx512vl")]
24209#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24210#[cfg_attr(test, assert_instr(vpermt2q))]
24211pub fn _mm256_mask_permutex2var_epi64(
24212 a: __m256i,
24213 k: __mmask8,
24214 idx: __m256i,
24215 b: __m256i,
24216) -> __m256i {
24217 unsafe {
24218 let permute: Simd = _mm256_permutex2var_epi64(a, idx, b).as_i64x4();
24219 transmute(src:simd_select_bitmask(m:k, yes:permute, no:a.as_i64x4()))
24220 }
24221}
24222
24223/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24224///
24225/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutex2var_epi64&expand=4245)
24226#[inline]
24227#[target_feature(enable = "avx512f,avx512vl")]
24228#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24229#[cfg_attr(test, assert_instr(vperm))] //vpermi2q or vpermt2q
24230pub fn _mm256_maskz_permutex2var_epi64(
24231 k: __mmask8,
24232 a: __m256i,
24233 idx: __m256i,
24234 b: __m256i,
24235) -> __m256i {
24236 unsafe {
24237 let permute: Simd = _mm256_permutex2var_epi64(a, idx, b).as_i64x4();
24238 transmute(src:simd_select_bitmask(m:k, yes:permute, no:i64x4::ZERO))
24239 }
24240}
24241
24242/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
24243///
24244/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask2_permutex2var_epi64&expand=4244)
24245#[inline]
24246#[target_feature(enable = "avx512f,avx512vl")]
24247#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24248#[cfg_attr(test, assert_instr(vpermi2q))]
24249pub fn _mm256_mask2_permutex2var_epi64(
24250 a: __m256i,
24251 idx: __m256i,
24252 k: __mmask8,
24253 b: __m256i,
24254) -> __m256i {
24255 unsafe {
24256 let permute: Simd = _mm256_permutex2var_epi64(a, idx, b).as_i64x4();
24257 transmute(src:simd_select_bitmask(m:k, yes:permute, no:idx.as_i64x4()))
24258 }
24259}
24260
24261/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
24262///
24263/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_permutex2var_epi64&expand=4242)
24264#[inline]
24265#[target_feature(enable = "avx512f,avx512vl")]
24266#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24267#[cfg_attr(test, assert_instr(vperm))] //vpermi2q or vpermt2q
24268pub fn _mm_permutex2var_epi64(a: __m128i, idx: __m128i, b: __m128i) -> __m128i {
24269 unsafe { transmute(src:vpermi2q128(a.as_i64x2(), idx.as_i64x2(), b.as_i64x2())) }
24270}
24271
24272/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
24273///
24274/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_permutex2var_epi64&expand=4239)
24275#[inline]
24276#[target_feature(enable = "avx512f,avx512vl")]
24277#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24278#[cfg_attr(test, assert_instr(vpermt2q))]
24279pub fn _mm_mask_permutex2var_epi64(a: __m128i, k: __mmask8, idx: __m128i, b: __m128i) -> __m128i {
24280 unsafe {
24281 let permute: Simd = _mm_permutex2var_epi64(a, idx, b).as_i64x2();
24282 transmute(src:simd_select_bitmask(m:k, yes:permute, no:a.as_i64x2()))
24283 }
24284}
24285
24286/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24287///
24288/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_permutex2var_epi64&expand=4241)
24289#[inline]
24290#[target_feature(enable = "avx512f,avx512vl")]
24291#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24292#[cfg_attr(test, assert_instr(vperm))] //vpermi2q or vpermt2q
24293pub fn _mm_maskz_permutex2var_epi64(k: __mmask8, a: __m128i, idx: __m128i, b: __m128i) -> __m128i {
24294 unsafe {
24295 let permute: Simd = _mm_permutex2var_epi64(a, idx, b).as_i64x2();
24296 transmute(src:simd_select_bitmask(m:k, yes:permute, no:i64x2::ZERO))
24297 }
24298}
24299
24300/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
24301///
24302/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask2_permutex2var_epi64&expand=4240)
24303#[inline]
24304#[target_feature(enable = "avx512f,avx512vl")]
24305#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24306#[cfg_attr(test, assert_instr(vpermi2q))]
24307pub fn _mm_mask2_permutex2var_epi64(a: __m128i, idx: __m128i, k: __mmask8, b: __m128i) -> __m128i {
24308 unsafe {
24309 let permute: Simd = _mm_permutex2var_epi64(a, idx, b).as_i64x2();
24310 transmute(src:simd_select_bitmask(m:k, yes:permute, no:idx.as_i64x2()))
24311 }
24312}
24313
24314/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
24315///
24316/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutex2var_ps&expand=4286)
24317#[inline]
24318#[target_feature(enable = "avx512f")]
24319#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24320#[cfg_attr(test, assert_instr(vperm))] //vpermi2ps or vpermt2ps
24321pub fn _mm512_permutex2var_ps(a: __m512, idx: __m512i, b: __m512) -> __m512 {
24322 unsafe { transmute(src:vpermi2ps(a.as_f32x16(), idx.as_i32x16(), b.as_f32x16())) }
24323}
24324
24325/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
24326///
24327/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutex2var_ps&expand=4283)
24328#[inline]
24329#[target_feature(enable = "avx512f")]
24330#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24331#[cfg_attr(test, assert_instr(vpermt2ps))]
24332pub fn _mm512_mask_permutex2var_ps(a: __m512, k: __mmask16, idx: __m512i, b: __m512) -> __m512 {
24333 unsafe {
24334 let permute: Simd = _mm512_permutex2var_ps(a, idx, b).as_f32x16();
24335 transmute(src:simd_select_bitmask(m:k, yes:permute, no:a.as_f32x16()))
24336 }
24337}
24338
24339/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24340///
24341/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutex2var_ps&expand=4285)
24342#[inline]
24343#[target_feature(enable = "avx512f")]
24344#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24345#[cfg_attr(test, assert_instr(vperm))] //vpermi2ps or vpermt2ps
24346pub fn _mm512_maskz_permutex2var_ps(k: __mmask16, a: __m512, idx: __m512i, b: __m512) -> __m512 {
24347 unsafe {
24348 let permute: Simd = _mm512_permutex2var_ps(a, idx, b).as_f32x16();
24349 transmute(src:simd_select_bitmask(m:k, yes:permute, no:f32x16::ZERO))
24350 }
24351}
24352
24353/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
24354///
24355/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask2_permutex2var_ps&expand=4284)
24356#[inline]
24357#[target_feature(enable = "avx512f")]
24358#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24359#[cfg_attr(test, assert_instr(vperm))] //should be vpermi2ps, but it shows vpermt2ps
24360pub fn _mm512_mask2_permutex2var_ps(a: __m512, idx: __m512i, k: __mmask16, b: __m512) -> __m512 {
24361 unsafe {
24362 let permute: Simd = _mm512_permutex2var_ps(a, idx, b).as_f32x16();
24363 let idx: Simd = _mm512_castsi512_ps(idx).as_f32x16();
24364 transmute(src:simd_select_bitmask(m:k, yes:permute, no:idx))
24365 }
24366}
24367
24368/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
24369///
24370/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutex2var_ps&expand=4282)
24371#[inline]
24372#[target_feature(enable = "avx512f,avx512vl")]
24373#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24374#[cfg_attr(test, assert_instr(vperm))] //vpermi2ps or vpermt2ps
24375pub fn _mm256_permutex2var_ps(a: __m256, idx: __m256i, b: __m256) -> __m256 {
24376 unsafe { transmute(src:vpermi2ps256(a.as_f32x8(), idx.as_i32x8(), b.as_f32x8())) }
24377}
24378
24379/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
24380///
24381/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutex2var_ps&expand=4279)
24382#[inline]
24383#[target_feature(enable = "avx512f,avx512vl")]
24384#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24385#[cfg_attr(test, assert_instr(vpermt2ps))]
24386pub fn _mm256_mask_permutex2var_ps(a: __m256, k: __mmask8, idx: __m256i, b: __m256) -> __m256 {
24387 unsafe {
24388 let permute: Simd = _mm256_permutex2var_ps(a, idx, b).as_f32x8();
24389 transmute(src:simd_select_bitmask(m:k, yes:permute, no:a.as_f32x8()))
24390 }
24391}
24392
24393/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24394///
24395/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutex2var_ps&expand=4281)
24396#[inline]
24397#[target_feature(enable = "avx512f,avx512vl")]
24398#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24399#[cfg_attr(test, assert_instr(vperm))] //vpermi2ps or vpermt2ps
24400pub fn _mm256_maskz_permutex2var_ps(k: __mmask8, a: __m256, idx: __m256i, b: __m256) -> __m256 {
24401 unsafe {
24402 let permute: Simd = _mm256_permutex2var_ps(a, idx, b).as_f32x8();
24403 transmute(src:simd_select_bitmask(m:k, yes:permute, no:f32x8::ZERO))
24404 }
24405}
24406
24407/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
24408///
24409/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask2_permutex2var_ps&expand=4280)
24410#[inline]
24411#[target_feature(enable = "avx512f,avx512vl")]
24412#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24413#[cfg_attr(test, assert_instr(vperm))] //should be vpermi2ps, but it shows vpermt2ps
24414pub fn _mm256_mask2_permutex2var_ps(a: __m256, idx: __m256i, k: __mmask8, b: __m256) -> __m256 {
24415 unsafe {
24416 let permute: Simd = _mm256_permutex2var_ps(a, idx, b).as_f32x8();
24417 let idx: Simd = _mm256_castsi256_ps(idx).as_f32x8();
24418 transmute(src:simd_select_bitmask(m:k, yes:permute, no:idx))
24419 }
24420}
24421
24422/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
24423///
24424/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_permutex2var_ps&expand=4278)
24425#[inline]
24426#[target_feature(enable = "avx512f,avx512vl")]
24427#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24428#[cfg_attr(test, assert_instr(vperm))] //vpermi2ps or vpermt2ps
24429pub fn _mm_permutex2var_ps(a: __m128, idx: __m128i, b: __m128) -> __m128 {
24430 unsafe { transmute(src:vpermi2ps128(a.as_f32x4(), idx.as_i32x4(), b.as_f32x4())) }
24431}
24432
24433/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
24434///
24435/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_permutex2var_ps&expand=4275)
24436#[inline]
24437#[target_feature(enable = "avx512f,avx512vl")]
24438#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24439#[cfg_attr(test, assert_instr(vpermt2ps))]
24440pub fn _mm_mask_permutex2var_ps(a: __m128, k: __mmask8, idx: __m128i, b: __m128) -> __m128 {
24441 unsafe {
24442 let permute: Simd = _mm_permutex2var_ps(a, idx, b).as_f32x4();
24443 transmute(src:simd_select_bitmask(m:k, yes:permute, no:a.as_f32x4()))
24444 }
24445}
24446
24447/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24448///
24449/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_permutex2var_ps&expand=4277)
24450#[inline]
24451#[target_feature(enable = "avx512f,avx512vl")]
24452#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24453#[cfg_attr(test, assert_instr(vperm))] //vpermi2ps or vpermt2ps
24454pub fn _mm_maskz_permutex2var_ps(k: __mmask8, a: __m128, idx: __m128i, b: __m128) -> __m128 {
24455 unsafe {
24456 let permute: Simd = _mm_permutex2var_ps(a, idx, b).as_f32x4();
24457 transmute(src:simd_select_bitmask(m:k, yes:permute, no:f32x4::ZERO))
24458 }
24459}
24460
24461/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
24462///
24463/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask2_permutex2var_ps&expand=4276)
24464#[inline]
24465#[target_feature(enable = "avx512f,avx512vl")]
24466#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24467#[cfg_attr(test, assert_instr(vperm))] //should be vpermi2ps, but it shows vpermt2ps
24468pub fn _mm_mask2_permutex2var_ps(a: __m128, idx: __m128i, k: __mmask8, b: __m128) -> __m128 {
24469 unsafe {
24470 let permute: Simd = _mm_permutex2var_ps(a, idx, b).as_f32x4();
24471 let idx: Simd = _mm_castsi128_ps(idx).as_f32x4();
24472 transmute(src:simd_select_bitmask(m:k, yes:permute, no:idx))
24473 }
24474}
24475
24476/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
24477///
24478/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutex2var_pd&expand=4274)
24479#[inline]
24480#[target_feature(enable = "avx512f")]
24481#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24482#[cfg_attr(test, assert_instr(vperm))] //vpermi2pd or vpermt2pd
24483pub fn _mm512_permutex2var_pd(a: __m512d, idx: __m512i, b: __m512d) -> __m512d {
24484 unsafe { transmute(src:vpermi2pd(a.as_f64x8(), idx.as_i64x8(), b.as_f64x8())) }
24485}
24486
24487/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
24488///
24489/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutex2var_pd&expand=4271)
24490#[inline]
24491#[target_feature(enable = "avx512f")]
24492#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24493#[cfg_attr(test, assert_instr(vpermt2pd))]
24494pub fn _mm512_mask_permutex2var_pd(a: __m512d, k: __mmask8, idx: __m512i, b: __m512d) -> __m512d {
24495 unsafe {
24496 let permute: Simd = _mm512_permutex2var_pd(a, idx, b).as_f64x8();
24497 transmute(src:simd_select_bitmask(m:k, yes:permute, no:a.as_f64x8()))
24498 }
24499}
24500
24501/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24502///
24503/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutex2var_pd&expand=4273)
24504#[inline]
24505#[target_feature(enable = "avx512f")]
24506#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24507#[cfg_attr(test, assert_instr(vperm))] //vpermi2pd or vpermt2pd
24508pub fn _mm512_maskz_permutex2var_pd(k: __mmask8, a: __m512d, idx: __m512i, b: __m512d) -> __m512d {
24509 unsafe {
24510 let permute: Simd = _mm512_permutex2var_pd(a, idx, b).as_f64x8();
24511 transmute(src:simd_select_bitmask(m:k, yes:permute, no:f64x8::ZERO))
24512 }
24513}
24514
24515/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set)
24516///
24517/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask2_permutex2var_pd&expand=4272)
24518#[inline]
24519#[target_feature(enable = "avx512f")]
24520#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24521#[cfg_attr(test, assert_instr(vperm))] //should be vpermi2pd, but it shows vpermt2pd
24522pub fn _mm512_mask2_permutex2var_pd(a: __m512d, idx: __m512i, k: __mmask8, b: __m512d) -> __m512d {
24523 unsafe {
24524 let permute: Simd = _mm512_permutex2var_pd(a, idx, b).as_f64x8();
24525 let idx: Simd = _mm512_castsi512_pd(idx).as_f64x8();
24526 transmute(src:simd_select_bitmask(m:k, yes:permute, no:idx))
24527 }
24528}
24529
24530/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
24531///
24532/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutex2var_pd&expand=4270)
24533#[inline]
24534#[target_feature(enable = "avx512f,avx512vl")]
24535#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24536#[cfg_attr(test, assert_instr(vperm))] //vpermi2pd or vpermt2pd
24537pub fn _mm256_permutex2var_pd(a: __m256d, idx: __m256i, b: __m256d) -> __m256d {
24538 unsafe { transmute(src:vpermi2pd256(a.as_f64x4(), idx.as_i64x4(), b.as_f64x4())) }
24539}
24540
24541/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
24542///
24543/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutex2var_pd&expand=4267)
24544#[inline]
24545#[target_feature(enable = "avx512f,avx512vl")]
24546#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24547#[cfg_attr(test, assert_instr(vpermt2pd))]
24548pub fn _mm256_mask_permutex2var_pd(a: __m256d, k: __mmask8, idx: __m256i, b: __m256d) -> __m256d {
24549 unsafe {
24550 let permute: Simd = _mm256_permutex2var_pd(a, idx, b).as_f64x4();
24551 transmute(src:simd_select_bitmask(m:k, yes:permute, no:a.as_f64x4()))
24552 }
24553}
24554
24555/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24556///
24557/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutex2var_pd&expand=4269)
24558#[inline]
24559#[target_feature(enable = "avx512f,avx512vl")]
24560#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24561#[cfg_attr(test, assert_instr(vperm))] //vpermi2pd or vpermt2pd
24562pub fn _mm256_maskz_permutex2var_pd(k: __mmask8, a: __m256d, idx: __m256i, b: __m256d) -> __m256d {
24563 unsafe {
24564 let permute: Simd = _mm256_permutex2var_pd(a, idx, b).as_f64x4();
24565 transmute(src:simd_select_bitmask(m:k, yes:permute, no:f64x4::ZERO))
24566 }
24567}
24568
24569/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set)
24570///
24571/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask2_permutex2var_pd&expand=4268)
24572#[inline]
24573#[target_feature(enable = "avx512f,avx512vl")]
24574#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24575#[cfg_attr(test, assert_instr(vperm))] //should be vpermi2pd, but it shows vpermt2pd
24576pub fn _mm256_mask2_permutex2var_pd(a: __m256d, idx: __m256i, k: __mmask8, b: __m256d) -> __m256d {
24577 unsafe {
24578 let permute: Simd = _mm256_permutex2var_pd(a, idx, b).as_f64x4();
24579 let idx: Simd = _mm256_castsi256_pd(idx).as_f64x4();
24580 transmute(src:simd_select_bitmask(m:k, yes:permute, no:idx))
24581 }
24582}
24583
24584/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
24585///
24586/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_permutex2var_pd&expand=4266)
24587#[inline]
24588#[target_feature(enable = "avx512f,avx512vl")]
24589#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24590#[cfg_attr(test, assert_instr(vperm))] //vpermi2pd or vpermt2pd
24591pub fn _mm_permutex2var_pd(a: __m128d, idx: __m128i, b: __m128d) -> __m128d {
24592 unsafe { transmute(src:vpermi2pd128(a.as_f64x2(), idx.as_i64x2(), b.as_f64x2())) }
24593}
24594
24595/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
24596///
24597/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_permutex2var_pd&expand=4263)
24598#[inline]
24599#[target_feature(enable = "avx512f,avx512vl")]
24600#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24601#[cfg_attr(test, assert_instr(vpermt2pd))]
24602pub fn _mm_mask_permutex2var_pd(a: __m128d, k: __mmask8, idx: __m128i, b: __m128d) -> __m128d {
24603 unsafe {
24604 let permute: Simd = _mm_permutex2var_pd(a, idx, b).as_f64x2();
24605 transmute(src:simd_select_bitmask(m:k, yes:permute, no:a.as_f64x2()))
24606 }
24607}
24608
24609/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24610///
24611/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_permutex2var_pd&expand=4265)
24612#[inline]
24613#[target_feature(enable = "avx512f,avx512vl")]
24614#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24615#[cfg_attr(test, assert_instr(vperm))] //vpermi2pd or vpermt2pd
24616pub fn _mm_maskz_permutex2var_pd(k: __mmask8, a: __m128d, idx: __m128i, b: __m128d) -> __m128d {
24617 unsafe {
24618 let permute: Simd = _mm_permutex2var_pd(a, idx, b).as_f64x2();
24619 transmute(src:simd_select_bitmask(m:k, yes:permute, no:f64x2::ZERO))
24620 }
24621}
24622
24623/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set)
24624///
24625/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask2_permutex2var_pd&expand=4264)
24626#[inline]
24627#[target_feature(enable = "avx512f,avx512vl")]
24628#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24629#[cfg_attr(test, assert_instr(vperm))] //should be vpermi2pd, but it shows vpermt2pd
24630pub fn _mm_mask2_permutex2var_pd(a: __m128d, idx: __m128i, k: __mmask8, b: __m128d) -> __m128d {
24631 unsafe {
24632 let permute: Simd = _mm_permutex2var_pd(a, idx, b).as_f64x2();
24633 let idx: Simd = _mm_castsi128_pd(idx).as_f64x2();
24634 transmute(src:simd_select_bitmask(m:k, yes:permute, no:idx))
24635 }
24636}
24637
24638/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst.
24639///
24640/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shuffle_epi32&expand=5150)
24641#[inline]
24642#[target_feature(enable = "avx512f")]
24643#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24644#[cfg_attr(test, assert_instr(vshufps, MASK = 9))] //should be vpshufd
24645#[rustc_legacy_const_generics(1)]
24646#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
24647pub const fn _mm512_shuffle_epi32<const MASK: _MM_PERM_ENUM>(a: __m512i) -> __m512i {
24648 unsafe {
24649 static_assert_uimm_bits!(MASK, 8);
24650 let r: i32x16 = simd_shuffle!(
24651 a.as_i32x16(),
24652 a.as_i32x16(),
24653 [
24654 MASK as u32 & 0b11,
24655 (MASK as u32 >> 2) & 0b11,
24656 (MASK as u32 >> 4) & 0b11,
24657 (MASK as u32 >> 6) & 0b11,
24658 (MASK as u32 & 0b11) + 4,
24659 ((MASK as u32 >> 2) & 0b11) + 4,
24660 ((MASK as u32 >> 4) & 0b11) + 4,
24661 ((MASK as u32 >> 6) & 0b11) + 4,
24662 (MASK as u32 & 0b11) + 8,
24663 ((MASK as u32 >> 2) & 0b11) + 8,
24664 ((MASK as u32 >> 4) & 0b11) + 8,
24665 ((MASK as u32 >> 6) & 0b11) + 8,
24666 (MASK as u32 & 0b11) + 12,
24667 ((MASK as u32 >> 2) & 0b11) + 12,
24668 ((MASK as u32 >> 4) & 0b11) + 12,
24669 ((MASK as u32 >> 6) & 0b11) + 12,
24670 ],
24671 );
24672 transmute(r)
24673 }
24674}
24675
24676/// Shuffle 32-bit integers in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24677///
24678/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shuffle_epi32&expand=5148)
24679#[inline]
24680#[target_feature(enable = "avx512f")]
24681#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24682#[cfg_attr(test, assert_instr(vpshufd, MASK = 9))]
24683#[rustc_legacy_const_generics(3)]
24684#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
24685pub const fn _mm512_mask_shuffle_epi32<const MASK: _MM_PERM_ENUM>(
24686 src: __m512i,
24687 k: __mmask16,
24688 a: __m512i,
24689) -> __m512i {
24690 unsafe {
24691 static_assert_uimm_bits!(MASK, 8);
24692 let r: __m512i = _mm512_shuffle_epi32::<MASK>(a);
24693 transmute(src:simd_select_bitmask(m:k, yes:r.as_i32x16(), no:src.as_i32x16()))
24694 }
24695}
24696
24697/// Shuffle 32-bit integers in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24698///
24699/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shuffle_epi32&expand=5149)
24700#[inline]
24701#[target_feature(enable = "avx512f")]
24702#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24703#[cfg_attr(test, assert_instr(vpshufd, MASK = 9))]
24704#[rustc_legacy_const_generics(2)]
24705#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
24706pub const fn _mm512_maskz_shuffle_epi32<const MASK: _MM_PERM_ENUM>(
24707 k: __mmask16,
24708 a: __m512i,
24709) -> __m512i {
24710 unsafe {
24711 static_assert_uimm_bits!(MASK, 8);
24712 let r: __m512i = _mm512_shuffle_epi32::<MASK>(a);
24713 transmute(src:simd_select_bitmask(m:k, yes:r.as_i32x16(), no:i32x16::ZERO))
24714 }
24715}
24716
24717/// Shuffle 32-bit integers in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24718///
24719/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shuffle_epi32&expand=5145)
24720#[inline]
24721#[target_feature(enable = "avx512f,avx512vl")]
24722#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24723#[cfg_attr(test, assert_instr(vpshufd, MASK = 9))]
24724#[rustc_legacy_const_generics(3)]
24725#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
24726pub const fn _mm256_mask_shuffle_epi32<const MASK: _MM_PERM_ENUM>(
24727 src: __m256i,
24728 k: __mmask8,
24729 a: __m256i,
24730) -> __m256i {
24731 unsafe {
24732 static_assert_uimm_bits!(MASK, 8);
24733 let r: __m256i = _mm256_shuffle_epi32::<MASK>(a);
24734 transmute(src:simd_select_bitmask(m:k, yes:r.as_i32x8(), no:src.as_i32x8()))
24735 }
24736}
24737
24738/// Shuffle 32-bit integers in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24739///
24740/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shuffle_epi32&expand=5146)
24741#[inline]
24742#[target_feature(enable = "avx512f,avx512vl")]
24743#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24744#[cfg_attr(test, assert_instr(vpshufd, MASK = 9))]
24745#[rustc_legacy_const_generics(2)]
24746#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
24747pub const fn _mm256_maskz_shuffle_epi32<const MASK: _MM_PERM_ENUM>(
24748 k: __mmask8,
24749 a: __m256i,
24750) -> __m256i {
24751 unsafe {
24752 static_assert_uimm_bits!(MASK, 8);
24753 let r: __m256i = _mm256_shuffle_epi32::<MASK>(a);
24754 transmute(src:simd_select_bitmask(m:k, yes:r.as_i32x8(), no:i32x8::ZERO))
24755 }
24756}
24757
24758/// Shuffle 32-bit integers in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24759///
24760/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shuffle_epi32&expand=5142)
24761#[inline]
24762#[target_feature(enable = "avx512f,avx512vl")]
24763#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24764#[cfg_attr(test, assert_instr(vpshufd, MASK = 9))]
24765#[rustc_legacy_const_generics(3)]
24766#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
24767pub const fn _mm_mask_shuffle_epi32<const MASK: _MM_PERM_ENUM>(
24768 src: __m128i,
24769 k: __mmask8,
24770 a: __m128i,
24771) -> __m128i {
24772 unsafe {
24773 static_assert_uimm_bits!(MASK, 8);
24774 let r: __m128i = _mm_shuffle_epi32::<MASK>(a);
24775 transmute(src:simd_select_bitmask(m:k, yes:r.as_i32x4(), no:src.as_i32x4()))
24776 }
24777}
24778
24779/// Shuffle 32-bit integers in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24780///
24781/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shuffle_epi32&expand=5143)
24782#[inline]
24783#[target_feature(enable = "avx512f,avx512vl")]
24784#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24785#[cfg_attr(test, assert_instr(vpshufd, MASK = 9))]
24786#[rustc_legacy_const_generics(2)]
24787#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
24788pub const fn _mm_maskz_shuffle_epi32<const MASK: _MM_PERM_ENUM>(
24789 k: __mmask8,
24790 a: __m128i,
24791) -> __m128i {
24792 unsafe {
24793 static_assert_uimm_bits!(MASK, 8);
24794 let r: __m128i = _mm_shuffle_epi32::<MASK>(a);
24795 transmute(src:simd_select_bitmask(m:k, yes:r.as_i32x4(), no:i32x4::ZERO))
24796 }
24797}
24798
24799/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst.
24800///
24801/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shuffle_ps&expand=5203)
24802#[inline]
24803#[target_feature(enable = "avx512f")]
24804#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24805#[cfg_attr(test, assert_instr(vshufps, MASK = 3))]
24806#[rustc_legacy_const_generics(2)]
24807#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
24808pub const fn _mm512_shuffle_ps<const MASK: i32>(a: __m512, b: __m512) -> __m512 {
24809 unsafe {
24810 static_assert_uimm_bits!(MASK, 8);
24811 simd_shuffle!(
24812 a,
24813 b,
24814 [
24815 MASK as u32 & 0b11,
24816 (MASK as u32 >> 2) & 0b11,
24817 ((MASK as u32 >> 4) & 0b11) + 16,
24818 ((MASK as u32 >> 6) & 0b11) + 16,
24819 (MASK as u32 & 0b11) + 4,
24820 ((MASK as u32 >> 2) & 0b11) + 4,
24821 ((MASK as u32 >> 4) & 0b11) + 20,
24822 ((MASK as u32 >> 6) & 0b11) + 20,
24823 (MASK as u32 & 0b11) + 8,
24824 ((MASK as u32 >> 2) & 0b11) + 8,
24825 ((MASK as u32 >> 4) & 0b11) + 24,
24826 ((MASK as u32 >> 6) & 0b11) + 24,
24827 (MASK as u32 & 0b11) + 12,
24828 ((MASK as u32 >> 2) & 0b11) + 12,
24829 ((MASK as u32 >> 4) & 0b11) + 28,
24830 ((MASK as u32 >> 6) & 0b11) + 28,
24831 ],
24832 )
24833 }
24834}
24835
24836/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24837///
24838/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shuffle_ps&expand=5201)
24839#[inline]
24840#[target_feature(enable = "avx512f")]
24841#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24842#[cfg_attr(test, assert_instr(vshufps, MASK = 3))]
24843#[rustc_legacy_const_generics(4)]
24844#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
24845pub const fn _mm512_mask_shuffle_ps<const MASK: i32>(
24846 src: __m512,
24847 k: __mmask16,
24848 a: __m512,
24849 b: __m512,
24850) -> __m512 {
24851 unsafe {
24852 static_assert_uimm_bits!(MASK, 8);
24853 let r: __m512 = _mm512_shuffle_ps::<MASK>(a, b);
24854 transmute(src:simd_select_bitmask(m:k, yes:r.as_f32x16(), no:src.as_f32x16()))
24855 }
24856}
24857
24858/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24859///
24860/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shuffle_ps&expand=5202)
24861#[inline]
24862#[target_feature(enable = "avx512f")]
24863#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24864#[cfg_attr(test, assert_instr(vshufps, MASK = 3))]
24865#[rustc_legacy_const_generics(3)]
24866#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
24867pub const fn _mm512_maskz_shuffle_ps<const MASK: i32>(
24868 k: __mmask16,
24869 a: __m512,
24870 b: __m512,
24871) -> __m512 {
24872 unsafe {
24873 static_assert_uimm_bits!(MASK, 8);
24874 let r: __m512 = _mm512_shuffle_ps::<MASK>(a, b);
24875 transmute(src:simd_select_bitmask(m:k, yes:r.as_f32x16(), no:f32x16::ZERO))
24876 }
24877}
24878
24879/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24880///
24881/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shuffle_ps&expand=5198)
24882#[inline]
24883#[target_feature(enable = "avx512f,avx512vl")]
24884#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24885#[cfg_attr(test, assert_instr(vshufps, MASK = 3))]
24886#[rustc_legacy_const_generics(4)]
24887#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
24888pub const fn _mm256_mask_shuffle_ps<const MASK: i32>(
24889 src: __m256,
24890 k: __mmask8,
24891 a: __m256,
24892 b: __m256,
24893) -> __m256 {
24894 unsafe {
24895 static_assert_uimm_bits!(MASK, 8);
24896 let r: __m256 = _mm256_shuffle_ps::<MASK>(a, b);
24897 transmute(src:simd_select_bitmask(m:k, yes:r.as_f32x8(), no:src.as_f32x8()))
24898 }
24899}
24900
24901/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24902///
24903/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shuffle_ps&expand=5199)
24904#[inline]
24905#[target_feature(enable = "avx512f,avx512vl")]
24906#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24907#[cfg_attr(test, assert_instr(vshufps, MASK = 3))]
24908#[rustc_legacy_const_generics(3)]
24909#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
24910pub const fn _mm256_maskz_shuffle_ps<const MASK: i32>(k: __mmask8, a: __m256, b: __m256) -> __m256 {
24911 unsafe {
24912 static_assert_uimm_bits!(MASK, 8);
24913 let r: __m256 = _mm256_shuffle_ps::<MASK>(a, b);
24914 transmute(src:simd_select_bitmask(m:k, yes:r.as_f32x8(), no:f32x8::ZERO))
24915 }
24916}
24917
24918/// Shuffle single-precision (32-bit) floating-point elements in a using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24919///
24920/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shuffle_ps&expand=5195)
24921#[inline]
24922#[target_feature(enable = "avx512f,avx512vl")]
24923#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24924#[cfg_attr(test, assert_instr(vshufps, MASK = 3))]
24925#[rustc_legacy_const_generics(4)]
24926#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
24927pub const fn _mm_mask_shuffle_ps<const MASK: i32>(
24928 src: __m128,
24929 k: __mmask8,
24930 a: __m128,
24931 b: __m128,
24932) -> __m128 {
24933 unsafe {
24934 static_assert_uimm_bits!(MASK, 8);
24935 let r: __m128 = _mm_shuffle_ps::<MASK>(a, b);
24936 transmute(src:simd_select_bitmask(m:k, yes:r.as_f32x4(), no:src.as_f32x4()))
24937 }
24938}
24939
24940/// Shuffle single-precision (32-bit) floating-point elements in a using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24941///
24942/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shuffle_ps&expand=5196)
24943#[inline]
24944#[target_feature(enable = "avx512f,avx512vl")]
24945#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24946#[cfg_attr(test, assert_instr(vshufps, MASK = 3))]
24947#[rustc_legacy_const_generics(3)]
24948#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
24949pub const fn _mm_maskz_shuffle_ps<const MASK: i32>(k: __mmask8, a: __m128, b: __m128) -> __m128 {
24950 unsafe {
24951 static_assert_uimm_bits!(MASK, 8);
24952 let r: __m128 = _mm_shuffle_ps::<MASK>(a, b);
24953 transmute(src:simd_select_bitmask(m:k, yes:r.as_f32x4(), no:f32x4::ZERO))
24954 }
24955}
24956
24957/// Shuffle double-precision (64-bit) floating-point elements within 128-bit lanes using the control in imm8, and store the results in dst.
24958///
24959/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shuffle_pd&expand=5192)
24960#[inline]
24961#[target_feature(enable = "avx512f")]
24962#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24963#[cfg_attr(test, assert_instr(vshufpd, MASK = 3))]
24964#[rustc_legacy_const_generics(2)]
24965#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
24966pub const fn _mm512_shuffle_pd<const MASK: i32>(a: __m512d, b: __m512d) -> __m512d {
24967 unsafe {
24968 static_assert_uimm_bits!(MASK, 8);
24969 simd_shuffle!(
24970 a,
24971 b,
24972 [
24973 MASK as u32 & 0b1,
24974 ((MASK as u32 >> 1) & 0b1) + 8,
24975 ((MASK as u32 >> 2) & 0b1) + 2,
24976 ((MASK as u32 >> 3) & 0b1) + 10,
24977 ((MASK as u32 >> 4) & 0b1) + 4,
24978 ((MASK as u32 >> 5) & 0b1) + 12,
24979 ((MASK as u32 >> 6) & 0b1) + 6,
24980 ((MASK as u32 >> 7) & 0b1) + 14,
24981 ],
24982 )
24983 }
24984}
24985
24986/// Shuffle double-precision (64-bit) floating-point elements within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24987///
24988/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shuffle_pd&expand=5190)
24989#[inline]
24990#[target_feature(enable = "avx512f")]
24991#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24992#[cfg_attr(test, assert_instr(vshufpd, MASK = 3))]
24993#[rustc_legacy_const_generics(4)]
24994#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
24995pub const fn _mm512_mask_shuffle_pd<const MASK: i32>(
24996 src: __m512d,
24997 k: __mmask8,
24998 a: __m512d,
24999 b: __m512d,
25000) -> __m512d {
25001 unsafe {
25002 static_assert_uimm_bits!(MASK, 8);
25003 let r: __m512d = _mm512_shuffle_pd::<MASK>(a, b);
25004 transmute(src:simd_select_bitmask(m:k, yes:r.as_f64x8(), no:src.as_f64x8()))
25005 }
25006}
25007
25008/// Shuffle double-precision (64-bit) floating-point elements within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25009///
25010/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shuffle_pd&expand=5191)
25011#[inline]
25012#[target_feature(enable = "avx512f")]
25013#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25014#[cfg_attr(test, assert_instr(vshufpd, MASK = 3))]
25015#[rustc_legacy_const_generics(3)]
25016#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25017pub const fn _mm512_maskz_shuffle_pd<const MASK: i32>(
25018 k: __mmask8,
25019 a: __m512d,
25020 b: __m512d,
25021) -> __m512d {
25022 unsafe {
25023 static_assert_uimm_bits!(MASK, 8);
25024 let r: __m512d = _mm512_shuffle_pd::<MASK>(a, b);
25025 transmute(src:simd_select_bitmask(m:k, yes:r.as_f64x8(), no:f64x8::ZERO))
25026 }
25027}
25028
25029/// Shuffle double-precision (64-bit) floating-point elements within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25030///
25031/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shuffle_pd&expand=5187)
25032#[inline]
25033#[target_feature(enable = "avx512f,avx512vl")]
25034#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25035#[cfg_attr(test, assert_instr(vshufpd, MASK = 3))]
25036#[rustc_legacy_const_generics(4)]
25037#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25038pub const fn _mm256_mask_shuffle_pd<const MASK: i32>(
25039 src: __m256d,
25040 k: __mmask8,
25041 a: __m256d,
25042 b: __m256d,
25043) -> __m256d {
25044 unsafe {
25045 static_assert_uimm_bits!(MASK, 8);
25046 let r: __m256d = _mm256_shuffle_pd::<MASK>(a, b);
25047 transmute(src:simd_select_bitmask(m:k, yes:r.as_f64x4(), no:src.as_f64x4()))
25048 }
25049}
25050
25051/// Shuffle double-precision (64-bit) floating-point elements within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25052///
25053/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shuffle_pd&expand=5188)
25054#[inline]
25055#[target_feature(enable = "avx512f,avx512vl")]
25056#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25057#[cfg_attr(test, assert_instr(vshufpd, MASK = 3))]
25058#[rustc_legacy_const_generics(3)]
25059#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25060pub const fn _mm256_maskz_shuffle_pd<const MASK: i32>(
25061 k: __mmask8,
25062 a: __m256d,
25063 b: __m256d,
25064) -> __m256d {
25065 unsafe {
25066 static_assert_uimm_bits!(MASK, 8);
25067 let r: __m256d = _mm256_shuffle_pd::<MASK>(a, b);
25068 transmute(src:simd_select_bitmask(m:k, yes:r.as_f64x4(), no:f64x4::ZERO))
25069 }
25070}
25071
25072/// Shuffle double-precision (64-bit) floating-point elements within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25073///
25074/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shuffle_pd&expand=5184)
25075#[inline]
25076#[target_feature(enable = "avx512f,avx512vl")]
25077#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25078#[cfg_attr(test, assert_instr(vshufpd, MASK = 1))]
25079#[rustc_legacy_const_generics(4)]
25080#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25081pub const fn _mm_mask_shuffle_pd<const MASK: i32>(
25082 src: __m128d,
25083 k: __mmask8,
25084 a: __m128d,
25085 b: __m128d,
25086) -> __m128d {
25087 unsafe {
25088 static_assert_uimm_bits!(MASK, 8);
25089 let r: __m128d = _mm_shuffle_pd::<MASK>(a, b);
25090 transmute(src:simd_select_bitmask(m:k, yes:r.as_f64x2(), no:src.as_f64x2()))
25091 }
25092}
25093
25094/// Shuffle double-precision (64-bit) floating-point elements within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25095///
25096/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shuffle_pd&expand=5185)
25097#[inline]
25098#[target_feature(enable = "avx512f,avx512vl")]
25099#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25100#[cfg_attr(test, assert_instr(vshufpd, MASK = 1))]
25101#[rustc_legacy_const_generics(3)]
25102#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25103pub const fn _mm_maskz_shuffle_pd<const MASK: i32>(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
25104 unsafe {
25105 static_assert_uimm_bits!(MASK, 8);
25106 let r: __m128d = _mm_shuffle_pd::<MASK>(a, b);
25107 transmute(src:simd_select_bitmask(m:k, yes:r.as_f64x2(), no:f64x2::ZERO))
25108 }
25109}
25110
25111/// Shuffle 128-bits (composed of 4 32-bit integers) selected by imm8 from a and b, and store the results in dst.
25112///
25113/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shuffle_i32x4&expand=5177)
25114#[inline]
25115#[target_feature(enable = "avx512f")]
25116#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25117#[cfg_attr(test, assert_instr(vshufi64x2, MASK = 0b10_01_01_01))] //should be vshufi32x4
25118#[rustc_legacy_const_generics(2)]
25119#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25120pub const fn _mm512_shuffle_i32x4<const MASK: i32>(a: __m512i, b: __m512i) -> __m512i {
25121 unsafe {
25122 static_assert_uimm_bits!(MASK, 8);
25123 let a = a.as_i32x16();
25124 let b = b.as_i32x16();
25125 let r: i32x16 = simd_shuffle!(
25126 a,
25127 b,
25128 [
25129 (MASK as u32 & 0b11) * 4 + 0,
25130 (MASK as u32 & 0b11) * 4 + 1,
25131 (MASK as u32 & 0b11) * 4 + 2,
25132 (MASK as u32 & 0b11) * 4 + 3,
25133 ((MASK as u32 >> 2) & 0b11) * 4 + 0,
25134 ((MASK as u32 >> 2) & 0b11) * 4 + 1,
25135 ((MASK as u32 >> 2) & 0b11) * 4 + 2,
25136 ((MASK as u32 >> 2) & 0b11) * 4 + 3,
25137 ((MASK as u32 >> 4) & 0b11) * 4 + 0 + 16,
25138 ((MASK as u32 >> 4) & 0b11) * 4 + 1 + 16,
25139 ((MASK as u32 >> 4) & 0b11) * 4 + 2 + 16,
25140 ((MASK as u32 >> 4) & 0b11) * 4 + 3 + 16,
25141 ((MASK as u32 >> 6) & 0b11) * 4 + 0 + 16,
25142 ((MASK as u32 >> 6) & 0b11) * 4 + 1 + 16,
25143 ((MASK as u32 >> 6) & 0b11) * 4 + 2 + 16,
25144 ((MASK as u32 >> 6) & 0b11) * 4 + 3 + 16,
25145 ],
25146 );
25147 transmute(r)
25148 }
25149}
25150
25151/// Shuffle 128-bits (composed of 4 32-bit integers) selected by imm8 from a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25152///
25153/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shuffle_i32x4&expand=5175)
25154#[inline]
25155#[target_feature(enable = "avx512f")]
25156#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25157#[cfg_attr(test, assert_instr(vshufi32x4, MASK = 0b10_11_01_01))]
25158#[rustc_legacy_const_generics(4)]
25159#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25160pub const fn _mm512_mask_shuffle_i32x4<const MASK: i32>(
25161 src: __m512i,
25162 k: __mmask16,
25163 a: __m512i,
25164 b: __m512i,
25165) -> __m512i {
25166 unsafe {
25167 static_assert_uimm_bits!(MASK, 8);
25168 let r: __m512i = _mm512_shuffle_i32x4::<MASK>(a, b);
25169 transmute(src:simd_select_bitmask(m:k, yes:r.as_i32x16(), no:src.as_i32x16()))
25170 }
25171}
25172
25173/// Shuffle 128-bits (composed of 4 32-bit integers) selected by imm8 from a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25174///
25175/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shuffle_i32x4&expand=5176)
25176#[inline]
25177#[target_feature(enable = "avx512f")]
25178#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25179#[cfg_attr(test, assert_instr(vshufi32x4, MASK = 0b10_11_01_01))]
25180#[rustc_legacy_const_generics(3)]
25181#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25182pub const fn _mm512_maskz_shuffle_i32x4<const MASK: i32>(
25183 k: __mmask16,
25184 a: __m512i,
25185 b: __m512i,
25186) -> __m512i {
25187 unsafe {
25188 static_assert_uimm_bits!(MASK, 8);
25189 let r: __m512i = _mm512_shuffle_i32x4::<MASK>(a, b);
25190 transmute(src:simd_select_bitmask(m:k, yes:r.as_i32x16(), no:i32x16::ZERO))
25191 }
25192}
25193
25194/// Shuffle 128-bits (composed of 4 32-bit integers) selected by imm8 from a and b, and store the results in dst.
25195///
25196/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shuffle_i32x4&expand=5174)
25197#[inline]
25198#[target_feature(enable = "avx512f,avx512vl")]
25199#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25200#[cfg_attr(test, assert_instr(vperm, MASK = 0b11))] //should be vshufi32x4
25201#[rustc_legacy_const_generics(2)]
25202#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25203pub const fn _mm256_shuffle_i32x4<const MASK: i32>(a: __m256i, b: __m256i) -> __m256i {
25204 unsafe {
25205 static_assert_uimm_bits!(MASK, 8);
25206 let a: Simd = a.as_i32x8();
25207 let b: Simd = b.as_i32x8();
25208 let r: i32x8 = simd_shuffle!(
25209 a,
25210 b,
25211 [
25212 (MASK as u32 & 0b1) * 4 + 0,
25213 (MASK as u32 & 0b1) * 4 + 1,
25214 (MASK as u32 & 0b1) * 4 + 2,
25215 (MASK as u32 & 0b1) * 4 + 3,
25216 ((MASK as u32 >> 1) & 0b1) * 4 + 0 + 8,
25217 ((MASK as u32 >> 1) & 0b1) * 4 + 1 + 8,
25218 ((MASK as u32 >> 1) & 0b1) * 4 + 2 + 8,
25219 ((MASK as u32 >> 1) & 0b1) * 4 + 3 + 8,
25220 ],
25221 );
25222 transmute(src:r)
25223 }
25224}
25225
25226/// Shuffle 128-bits (composed of 4 32-bit integers) selected by imm8 from a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25227///
25228/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shuffle_i32x4&expand=5172)
25229#[inline]
25230#[target_feature(enable = "avx512f,avx512vl")]
25231#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25232#[cfg_attr(test, assert_instr(vshufi32x4, MASK = 0b11))]
25233#[rustc_legacy_const_generics(4)]
25234#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25235pub const fn _mm256_mask_shuffle_i32x4<const MASK: i32>(
25236 src: __m256i,
25237 k: __mmask8,
25238 a: __m256i,
25239 b: __m256i,
25240) -> __m256i {
25241 unsafe {
25242 static_assert_uimm_bits!(MASK, 8);
25243 let r: __m256i = _mm256_shuffle_i32x4::<MASK>(a, b);
25244 transmute(src:simd_select_bitmask(m:k, yes:r.as_i32x8(), no:src.as_i32x8()))
25245 }
25246}
25247
25248/// Shuffle 128-bits (composed of 4 32-bit integers) selected by imm8 from a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25249///
25250/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shuffle_i32x4&expand=5173)
25251#[inline]
25252#[target_feature(enable = "avx512f,avx512vl")]
25253#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25254#[cfg_attr(test, assert_instr(vshufi32x4, MASK = 0b11))]
25255#[rustc_legacy_const_generics(3)]
25256#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25257pub const fn _mm256_maskz_shuffle_i32x4<const MASK: i32>(
25258 k: __mmask8,
25259 a: __m256i,
25260 b: __m256i,
25261) -> __m256i {
25262 unsafe {
25263 static_assert_uimm_bits!(MASK, 8);
25264 let r: __m256i = _mm256_shuffle_i32x4::<MASK>(a, b);
25265 transmute(src:simd_select_bitmask(m:k, yes:r.as_i32x8(), no:i32x8::ZERO))
25266 }
25267}
25268
25269/// Shuffle 128-bits (composed of 2 64-bit integers) selected by imm8 from a and b, and store the results in dst.
25270///
25271/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shuffle_i64x2&expand=5183)
25272#[inline]
25273#[target_feature(enable = "avx512f")]
25274#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25275#[cfg_attr(test, assert_instr(vshufi64x2, MASK = 0b10_11_11_11))]
25276#[rustc_legacy_const_generics(2)]
25277#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25278pub const fn _mm512_shuffle_i64x2<const MASK: i32>(a: __m512i, b: __m512i) -> __m512i {
25279 unsafe {
25280 static_assert_uimm_bits!(MASK, 8);
25281 let a: Simd = a.as_i64x8();
25282 let b: Simd = b.as_i64x8();
25283 let r: i64x8 = simd_shuffle!(
25284 a,
25285 b,
25286 [
25287 (MASK as u32 & 0b11) * 2 + 0,
25288 (MASK as u32 & 0b11) * 2 + 1,
25289 ((MASK as u32 >> 2) & 0b11) * 2 + 0,
25290 ((MASK as u32 >> 2) & 0b11) * 2 + 1,
25291 ((MASK as u32 >> 4) & 0b11) * 2 + 0 + 8,
25292 ((MASK as u32 >> 4) & 0b11) * 2 + 1 + 8,
25293 ((MASK as u32 >> 6) & 0b11) * 2 + 0 + 8,
25294 ((MASK as u32 >> 6) & 0b11) * 2 + 1 + 8,
25295 ],
25296 );
25297 transmute(src:r)
25298 }
25299}
25300
25301/// Shuffle 128-bits (composed of 2 64-bit integers) selected by imm8 from a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25302///
25303/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shuffle_i64x2&expand=5181)
25304#[inline]
25305#[target_feature(enable = "avx512f")]
25306#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25307#[cfg_attr(test, assert_instr(vshufi64x2, MASK = 0b10_11_11_11))]
25308#[rustc_legacy_const_generics(4)]
25309#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25310pub const fn _mm512_mask_shuffle_i64x2<const MASK: i32>(
25311 src: __m512i,
25312 k: __mmask8,
25313 a: __m512i,
25314 b: __m512i,
25315) -> __m512i {
25316 unsafe {
25317 static_assert_uimm_bits!(MASK, 8);
25318 let r: __m512i = _mm512_shuffle_i64x2::<MASK>(a, b);
25319 transmute(src:simd_select_bitmask(m:k, yes:r.as_i64x8(), no:src.as_i64x8()))
25320 }
25321}
25322
25323/// Shuffle 128-bits (composed of 2 64-bit integers) selected by imm8 from a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25324///
25325/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shuffle_i64x2&expand=5182)
25326#[inline]
25327#[target_feature(enable = "avx512f")]
25328#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25329#[cfg_attr(test, assert_instr(vshufi64x2, MASK = 0b10_11_11_11))]
25330#[rustc_legacy_const_generics(3)]
25331#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25332pub const fn _mm512_maskz_shuffle_i64x2<const MASK: i32>(
25333 k: __mmask8,
25334 a: __m512i,
25335 b: __m512i,
25336) -> __m512i {
25337 unsafe {
25338 static_assert_uimm_bits!(MASK, 8);
25339 let r: __m512i = _mm512_shuffle_i64x2::<MASK>(a, b);
25340 transmute(src:simd_select_bitmask(m:k, yes:r.as_i64x8(), no:i64x8::ZERO))
25341 }
25342}
25343
25344/// Shuffle 128-bits (composed of 2 64-bit integers) selected by imm8 from a and b, and store the results in dst.
25345///
25346/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shuffle_i64x2&expand=5180)
25347#[inline]
25348#[target_feature(enable = "avx512f,avx512vl")]
25349#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25350#[cfg_attr(test, assert_instr(vperm, MASK = 0b01))] //should be vshufi64x2
25351#[rustc_legacy_const_generics(2)]
25352#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25353pub const fn _mm256_shuffle_i64x2<const MASK: i32>(a: __m256i, b: __m256i) -> __m256i {
25354 unsafe {
25355 static_assert_uimm_bits!(MASK, 8);
25356 let a: Simd = a.as_i64x4();
25357 let b: Simd = b.as_i64x4();
25358 let r: i64x4 = simd_shuffle!(
25359 a,
25360 b,
25361 [
25362 (MASK as u32 & 0b1) * 2 + 0,
25363 (MASK as u32 & 0b1) * 2 + 1,
25364 ((MASK as u32 >> 1) & 0b1) * 2 + 0 + 4,
25365 ((MASK as u32 >> 1) & 0b1) * 2 + 1 + 4,
25366 ],
25367 );
25368 transmute(src:r)
25369 }
25370}
25371
25372/// Shuffle 128-bits (composed of 2 64-bit integers) selected by imm8 from a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25373///
25374/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shuffle_i64x2&expand=5178)
25375#[inline]
25376#[target_feature(enable = "avx512f,avx512vl")]
25377#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25378#[cfg_attr(test, assert_instr(vshufi64x2, MASK = 0b11))]
25379#[rustc_legacy_const_generics(4)]
25380#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25381pub const fn _mm256_mask_shuffle_i64x2<const MASK: i32>(
25382 src: __m256i,
25383 k: __mmask8,
25384 a: __m256i,
25385 b: __m256i,
25386) -> __m256i {
25387 unsafe {
25388 static_assert_uimm_bits!(MASK, 8);
25389 let r: __m256i = _mm256_shuffle_i64x2::<MASK>(a, b);
25390 transmute(src:simd_select_bitmask(m:k, yes:r.as_i64x4(), no:src.as_i64x4()))
25391 }
25392}
25393
25394/// Shuffle 128-bits (composed of 2 64-bit integers) selected by imm8 from a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25395///
25396/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shuffle_i64x2&expand=5179)
25397#[inline]
25398#[target_feature(enable = "avx512f,avx512vl")]
25399#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25400#[cfg_attr(test, assert_instr(vshufi64x2, MASK = 0b11))]
25401#[rustc_legacy_const_generics(3)]
25402#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25403pub const fn _mm256_maskz_shuffle_i64x2<const MASK: i32>(
25404 k: __mmask8,
25405 a: __m256i,
25406 b: __m256i,
25407) -> __m256i {
25408 unsafe {
25409 static_assert_uimm_bits!(MASK, 8);
25410 let r: __m256i = _mm256_shuffle_i64x2::<MASK>(a, b);
25411 transmute(src:simd_select_bitmask(m:k, yes:r.as_i64x4(), no:i64x4::ZERO))
25412 }
25413}
25414
25415/// Shuffle 128-bits (composed of 4 single-precision (32-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst.
25416///
25417/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shuffle_f32x4&expand=5165)
25418#[inline]
25419#[target_feature(enable = "avx512f")]
25420#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25421#[cfg_attr(test, assert_instr(vshuff64x2, MASK = 0b1011))] //should be vshuff32x4, but generate vshuff64x2
25422#[rustc_legacy_const_generics(2)]
25423#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25424pub const fn _mm512_shuffle_f32x4<const MASK: i32>(a: __m512, b: __m512) -> __m512 {
25425 unsafe {
25426 static_assert_uimm_bits!(MASK, 8);
25427 let a = a.as_f32x16();
25428 let b = b.as_f32x16();
25429 let r: f32x16 = simd_shuffle!(
25430 a,
25431 b,
25432 [
25433 (MASK as u32 & 0b11) * 4 + 0,
25434 (MASK as u32 & 0b11) * 4 + 1,
25435 (MASK as u32 & 0b11) * 4 + 2,
25436 (MASK as u32 & 0b11) * 4 + 3,
25437 ((MASK as u32 >> 2) & 0b11) * 4 + 0,
25438 ((MASK as u32 >> 2) & 0b11) * 4 + 1,
25439 ((MASK as u32 >> 2) & 0b11) * 4 + 2,
25440 ((MASK as u32 >> 2) & 0b11) * 4 + 3,
25441 ((MASK as u32 >> 4) & 0b11) * 4 + 0 + 16,
25442 ((MASK as u32 >> 4) & 0b11) * 4 + 1 + 16,
25443 ((MASK as u32 >> 4) & 0b11) * 4 + 2 + 16,
25444 ((MASK as u32 >> 4) & 0b11) * 4 + 3 + 16,
25445 ((MASK as u32 >> 6) & 0b11) * 4 + 0 + 16,
25446 ((MASK as u32 >> 6) & 0b11) * 4 + 1 + 16,
25447 ((MASK as u32 >> 6) & 0b11) * 4 + 2 + 16,
25448 ((MASK as u32 >> 6) & 0b11) * 4 + 3 + 16,
25449 ],
25450 );
25451 transmute(r)
25452 }
25453}
25454
25455/// Shuffle 128-bits (composed of 4 single-precision (32-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25456///
25457/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shuffle_f32x4&expand=5163)
25458#[inline]
25459#[target_feature(enable = "avx512f")]
25460#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25461#[cfg_attr(test, assert_instr(vshuff32x4, MASK = 0b1011))]
25462#[rustc_legacy_const_generics(4)]
25463#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25464pub const fn _mm512_mask_shuffle_f32x4<const MASK: i32>(
25465 src: __m512,
25466 k: __mmask16,
25467 a: __m512,
25468 b: __m512,
25469) -> __m512 {
25470 unsafe {
25471 static_assert_uimm_bits!(MASK, 8);
25472 let r: __m512 = _mm512_shuffle_f32x4::<MASK>(a, b);
25473 transmute(src:simd_select_bitmask(m:k, yes:r.as_f32x16(), no:src.as_f32x16()))
25474 }
25475}
25476
25477/// Shuffle 128-bits (composed of 4 single-precision (32-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25478///
25479/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shuffle_f32x4&expand=5164)
25480#[inline]
25481#[target_feature(enable = "avx512f")]
25482#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25483#[cfg_attr(test, assert_instr(vshuff32x4, MASK = 0b1011))]
25484#[rustc_legacy_const_generics(3)]
25485#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25486pub const fn _mm512_maskz_shuffle_f32x4<const MASK: i32>(
25487 k: __mmask16,
25488 a: __m512,
25489 b: __m512,
25490) -> __m512 {
25491 unsafe {
25492 static_assert_uimm_bits!(MASK, 8);
25493 let r: __m512 = _mm512_shuffle_f32x4::<MASK>(a, b);
25494 transmute(src:simd_select_bitmask(m:k, yes:r.as_f32x16(), no:f32x16::ZERO))
25495 }
25496}
25497
25498/// Shuffle 128-bits (composed of 4 single-precision (32-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst.
25499///
25500/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shuffle_f32x4&expand=5162)
25501#[inline]
25502#[target_feature(enable = "avx512f,avx512vl")]
25503#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25504#[cfg_attr(test, assert_instr(vperm, MASK = 0b01))] //should be vshuff32x4
25505#[rustc_legacy_const_generics(2)]
25506#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25507pub const fn _mm256_shuffle_f32x4<const MASK: i32>(a: __m256, b: __m256) -> __m256 {
25508 unsafe {
25509 static_assert_uimm_bits!(MASK, 8);
25510 let a: Simd = a.as_f32x8();
25511 let b: Simd = b.as_f32x8();
25512 let r: f32x8 = simd_shuffle!(
25513 a,
25514 b,
25515 [
25516 (MASK as u32 & 0b1) * 4 + 0,
25517 (MASK as u32 & 0b1) * 4 + 1,
25518 (MASK as u32 & 0b1) * 4 + 2,
25519 (MASK as u32 & 0b1) * 4 + 3,
25520 ((MASK as u32 >> 1) & 0b1) * 4 + 0 + 8,
25521 ((MASK as u32 >> 1) & 0b1) * 4 + 1 + 8,
25522 ((MASK as u32 >> 1) & 0b1) * 4 + 2 + 8,
25523 ((MASK as u32 >> 1) & 0b1) * 4 + 3 + 8,
25524 ],
25525 );
25526 transmute(src:r)
25527 }
25528}
25529
25530/// Shuffle 128-bits (composed of 4 single-precision (32-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25531///
25532/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shuffle_f32x4&expand=5160)
25533#[inline]
25534#[target_feature(enable = "avx512f,avx512vl")]
25535#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25536#[cfg_attr(test, assert_instr(vshuff32x4, MASK = 0b11))]
25537#[rustc_legacy_const_generics(4)]
25538#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25539pub const fn _mm256_mask_shuffle_f32x4<const MASK: i32>(
25540 src: __m256,
25541 k: __mmask8,
25542 a: __m256,
25543 b: __m256,
25544) -> __m256 {
25545 unsafe {
25546 static_assert_uimm_bits!(MASK, 8);
25547 let r: __m256 = _mm256_shuffle_f32x4::<MASK>(a, b);
25548 transmute(src:simd_select_bitmask(m:k, yes:r.as_f32x8(), no:src.as_f32x8()))
25549 }
25550}
25551
25552/// Shuffle 128-bits (composed of 4 single-precision (32-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25553///
25554/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shuffle_f32x4&expand=5161)
25555#[inline]
25556#[target_feature(enable = "avx512f,avx512vl")]
25557#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25558#[cfg_attr(test, assert_instr(vshuff32x4, MASK = 0b11))]
25559#[rustc_legacy_const_generics(3)]
25560#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25561pub const fn _mm256_maskz_shuffle_f32x4<const MASK: i32>(
25562 k: __mmask8,
25563 a: __m256,
25564 b: __m256,
25565) -> __m256 {
25566 unsafe {
25567 static_assert_uimm_bits!(MASK, 8);
25568 let r: __m256 = _mm256_shuffle_f32x4::<MASK>(a, b);
25569 transmute(src:simd_select_bitmask(m:k, yes:r.as_f32x8(), no:f32x8::ZERO))
25570 }
25571}
25572
25573/// Shuffle 128-bits (composed of 2 double-precision (64-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst.
25574///
25575/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shuffle_f64x2&expand=5171)
25576#[inline]
25577#[target_feature(enable = "avx512f")]
25578#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25579#[cfg_attr(test, assert_instr(vshuff64x2, MASK = 0b10_11_11_11))]
25580#[rustc_legacy_const_generics(2)]
25581#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25582pub const fn _mm512_shuffle_f64x2<const MASK: i32>(a: __m512d, b: __m512d) -> __m512d {
25583 unsafe {
25584 static_assert_uimm_bits!(MASK, 8);
25585 let a: Simd = a.as_f64x8();
25586 let b: Simd = b.as_f64x8();
25587 let r: f64x8 = simd_shuffle!(
25588 a,
25589 b,
25590 [
25591 (MASK as u32 & 0b11) * 2 + 0,
25592 (MASK as u32 & 0b11) * 2 + 1,
25593 ((MASK as u32 >> 2) & 0b11) * 2 + 0,
25594 ((MASK as u32 >> 2) & 0b11) * 2 + 1,
25595 ((MASK as u32 >> 4) & 0b11) * 2 + 0 + 8,
25596 ((MASK as u32 >> 4) & 0b11) * 2 + 1 + 8,
25597 ((MASK as u32 >> 6) & 0b11) * 2 + 0 + 8,
25598 ((MASK as u32 >> 6) & 0b11) * 2 + 1 + 8,
25599 ],
25600 );
25601 transmute(src:r)
25602 }
25603}
25604
25605/// Shuffle 128-bits (composed of 2 double-precision (64-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25606///
25607/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shuffle_f64x2&expand=5169)
25608#[inline]
25609#[target_feature(enable = "avx512f")]
25610#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25611#[cfg_attr(test, assert_instr(vshuff64x2, MASK = 0b10_11_11_11))]
25612#[rustc_legacy_const_generics(4)]
25613#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25614pub const fn _mm512_mask_shuffle_f64x2<const MASK: i32>(
25615 src: __m512d,
25616 k: __mmask8,
25617 a: __m512d,
25618 b: __m512d,
25619) -> __m512d {
25620 unsafe {
25621 static_assert_uimm_bits!(MASK, 8);
25622 let r: __m512d = _mm512_shuffle_f64x2::<MASK>(a, b);
25623 transmute(src:simd_select_bitmask(m:k, yes:r.as_f64x8(), no:src.as_f64x8()))
25624 }
25625}
25626
25627/// Shuffle 128-bits (composed of 2 double-precision (64-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25628///
25629/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shuffle_f64x2&expand=5170)
25630#[inline]
25631#[target_feature(enable = "avx512f")]
25632#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25633#[cfg_attr(test, assert_instr(vshuff64x2, MASK = 0b10_11_11_11))]
25634#[rustc_legacy_const_generics(3)]
25635#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25636pub const fn _mm512_maskz_shuffle_f64x2<const MASK: i32>(
25637 k: __mmask8,
25638 a: __m512d,
25639 b: __m512d,
25640) -> __m512d {
25641 unsafe {
25642 static_assert_uimm_bits!(MASK, 8);
25643 let r: __m512d = _mm512_shuffle_f64x2::<MASK>(a, b);
25644 transmute(src:simd_select_bitmask(m:k, yes:r.as_f64x8(), no:f64x8::ZERO))
25645 }
25646}
25647
25648/// Shuffle 128-bits (composed of 2 double-precision (64-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst.
25649///
25650/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shuffle_f64x2&expand=5168)
25651#[inline]
25652#[target_feature(enable = "avx512f,avx512vl")]
25653#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25654#[cfg_attr(test, assert_instr(vperm, MASK = 0b01))] //should be vshuff64x2
25655#[rustc_legacy_const_generics(2)]
25656#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25657pub const fn _mm256_shuffle_f64x2<const MASK: i32>(a: __m256d, b: __m256d) -> __m256d {
25658 unsafe {
25659 static_assert_uimm_bits!(MASK, 8);
25660 let a: Simd = a.as_f64x4();
25661 let b: Simd = b.as_f64x4();
25662 let r: f64x4 = simd_shuffle!(
25663 a,
25664 b,
25665 [
25666 (MASK as u32 & 0b1) * 2 + 0,
25667 (MASK as u32 & 0b1) * 2 + 1,
25668 ((MASK as u32 >> 1) & 0b1) * 2 + 0 + 4,
25669 ((MASK as u32 >> 1) & 0b1) * 2 + 1 + 4,
25670 ],
25671 );
25672 transmute(src:r)
25673 }
25674}
25675
25676/// Shuffle 128-bits (composed of 2 double-precision (64-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25677///
25678/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shuffle_f64x2&expand=5166)
25679#[inline]
25680#[target_feature(enable = "avx512f,avx512vl")]
25681#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25682#[cfg_attr(test, assert_instr(vshuff64x2, MASK = 0b11))]
25683#[rustc_legacy_const_generics(4)]
25684#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25685pub const fn _mm256_mask_shuffle_f64x2<const MASK: i32>(
25686 src: __m256d,
25687 k: __mmask8,
25688 a: __m256d,
25689 b: __m256d,
25690) -> __m256d {
25691 unsafe {
25692 static_assert_uimm_bits!(MASK, 8);
25693 let r: __m256d = _mm256_shuffle_f64x2::<MASK>(a, b);
25694 transmute(src:simd_select_bitmask(m:k, yes:r.as_f64x4(), no:src.as_f64x4()))
25695 }
25696}
25697
25698/// Shuffle 128-bits (composed of 2 double-precision (64-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25699///
25700/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shuffle_f64x2&expand=5167)
25701#[inline]
25702#[target_feature(enable = "avx512f,avx512vl")]
25703#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25704#[cfg_attr(test, assert_instr(vshuff64x2, MASK = 0b11))]
25705#[rustc_legacy_const_generics(3)]
25706#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25707pub const fn _mm256_maskz_shuffle_f64x2<const MASK: i32>(
25708 k: __mmask8,
25709 a: __m256d,
25710 b: __m256d,
25711) -> __m256d {
25712 unsafe {
25713 static_assert_uimm_bits!(MASK, 8);
25714 let r: __m256d = _mm256_shuffle_f64x2::<MASK>(a, b);
25715 transmute(src:simd_select_bitmask(m:k, yes:r.as_f64x4(), no:f64x4::ZERO))
25716 }
25717}
25718
25719/// Extract 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from a, selected with imm8, and store the result in dst.
25720///
25721/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_extractf32x4_ps&expand=2442)
25722#[inline]
25723#[target_feature(enable = "avx512f")]
25724#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25725#[cfg_attr(test, assert_instr(vextractf32x4, IMM8 = 3))]
25726#[rustc_legacy_const_generics(1)]
25727#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25728pub const fn _mm512_extractf32x4_ps<const IMM8: i32>(a: __m512) -> __m128 {
25729 unsafe {
25730 static_assert_uimm_bits!(IMM8, 2);
25731 match IMM8 & 0x3 {
25732 0 => simd_shuffle!(a, _mm512_undefined_ps(), [0, 1, 2, 3]),
25733 1 => simd_shuffle!(a, _mm512_undefined_ps(), [4, 5, 6, 7]),
25734 2 => simd_shuffle!(a, _mm512_undefined_ps(), [8, 9, 10, 11]),
25735 _ => simd_shuffle!(a, _mm512_undefined_ps(), [12, 13, 14, 15]),
25736 }
25737 }
25738}
25739
25740/// Extract 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from a, selected with imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25741///
25742/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_extractf32x4_ps&expand=2443)
25743#[inline]
25744#[target_feature(enable = "avx512f")]
25745#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25746#[cfg_attr(test, assert_instr(vextractf32x4, IMM8 = 3))]
25747#[rustc_legacy_const_generics(3)]
25748#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25749pub const fn _mm512_mask_extractf32x4_ps<const IMM8: i32>(
25750 src: __m128,
25751 k: __mmask8,
25752 a: __m512,
25753) -> __m128 {
25754 unsafe {
25755 static_assert_uimm_bits!(IMM8, 2);
25756 let r: __m128 = _mm512_extractf32x4_ps::<IMM8>(a);
25757 transmute(src:simd_select_bitmask(m:k, yes:r.as_f32x4(), no:src.as_f32x4()))
25758 }
25759}
25760
25761/// Extract 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from a, selected with imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25762///
25763/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_extractf32x4_ps&expand=2444)
25764#[inline]
25765#[target_feature(enable = "avx512f")]
25766#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25767#[cfg_attr(test, assert_instr(vextractf32x4, IMM8 = 3))]
25768#[rustc_legacy_const_generics(2)]
25769#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25770pub const fn _mm512_maskz_extractf32x4_ps<const IMM8: i32>(k: __mmask8, a: __m512) -> __m128 {
25771 unsafe {
25772 static_assert_uimm_bits!(IMM8, 2);
25773 let r: __m128 = _mm512_extractf32x4_ps::<IMM8>(a);
25774 transmute(src:simd_select_bitmask(m:k, yes:r.as_f32x4(), no:f32x4::ZERO))
25775 }
25776}
25777
25778/// Extract 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from a, selected with imm8, and store the result in dst.
25779///
25780/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_extractf32x4_ps&expand=2439)
25781#[inline]
25782#[target_feature(enable = "avx512f,avx512vl")]
25783#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25784#[cfg_attr(
25785 test,
25786 assert_instr(vextract, IMM8 = 1) //should be vextractf32x4
25787)]
25788#[rustc_legacy_const_generics(1)]
25789#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25790pub const fn _mm256_extractf32x4_ps<const IMM8: i32>(a: __m256) -> __m128 {
25791 unsafe {
25792 static_assert_uimm_bits!(IMM8, 1);
25793 match IMM8 & 0x1 {
25794 0 => simd_shuffle!(a, _mm256_undefined_ps(), [0, 1, 2, 3]),
25795 _ => simd_shuffle!(a, _mm256_undefined_ps(), [4, 5, 6, 7]),
25796 }
25797 }
25798}
25799
25800/// Extract 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from a, selected with imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25801///
25802/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_extractf32x4_ps&expand=2440)
25803#[inline]
25804#[target_feature(enable = "avx512f,avx512vl")]
25805#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25806#[cfg_attr(test, assert_instr(vextractf32x4, IMM8 = 1))]
25807#[rustc_legacy_const_generics(3)]
25808#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25809pub const fn _mm256_mask_extractf32x4_ps<const IMM8: i32>(
25810 src: __m128,
25811 k: __mmask8,
25812 a: __m256,
25813) -> __m128 {
25814 unsafe {
25815 static_assert_uimm_bits!(IMM8, 1);
25816 let r: __m128 = _mm256_extractf32x4_ps::<IMM8>(a);
25817 transmute(src:simd_select_bitmask(m:k, yes:r.as_f32x4(), no:src.as_f32x4()))
25818 }
25819}
25820
25821/// Extract 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from a, selected with imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25822///
25823/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_extractf32x4_ps&expand=2441)
25824#[inline]
25825#[target_feature(enable = "avx512f,avx512vl")]
25826#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25827#[cfg_attr(test, assert_instr(vextractf32x4, IMM8 = 1))]
25828#[rustc_legacy_const_generics(2)]
25829#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25830pub const fn _mm256_maskz_extractf32x4_ps<const IMM8: i32>(k: __mmask8, a: __m256) -> __m128 {
25831 unsafe {
25832 static_assert_uimm_bits!(IMM8, 1);
25833 let r: __m128 = _mm256_extractf32x4_ps::<IMM8>(a);
25834 transmute(src:simd_select_bitmask(m:k, yes:r.as_f32x4(), no:f32x4::ZERO))
25835 }
25836}
25837
25838/// Extract 256 bits (composed of 4 packed 64-bit integers) from a, selected with IMM1, and store the result in dst.
25839///
25840/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_extracti64x4_epi64&expand=2473)
25841#[inline]
25842#[target_feature(enable = "avx512f")]
25843#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25844#[cfg_attr(
25845 test,
25846 assert_instr(vextractf64x4, IMM1 = 1) //should be vextracti64x4
25847)]
25848#[rustc_legacy_const_generics(1)]
25849#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25850pub const fn _mm512_extracti64x4_epi64<const IMM1: i32>(a: __m512i) -> __m256i {
25851 unsafe {
25852 static_assert_uimm_bits!(IMM1, 1);
25853 match IMM1 {
25854 0 => simd_shuffle!(a, _mm512_setzero_si512(), [0, 1, 2, 3]),
25855 _ => simd_shuffle!(a, _mm512_setzero_si512(), [4, 5, 6, 7]),
25856 }
25857 }
25858}
25859
25860/// Extract 256 bits (composed of 4 packed 64-bit integers) from a, selected with IMM1, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25861///
25862/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_extracti64x4_epi64&expand=2474)
25863#[inline]
25864#[target_feature(enable = "avx512f")]
25865#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25866#[cfg_attr(test, assert_instr(vextracti64x4, IMM1 = 1))]
25867#[rustc_legacy_const_generics(3)]
25868#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25869pub const fn _mm512_mask_extracti64x4_epi64<const IMM1: i32>(
25870 src: __m256i,
25871 k: __mmask8,
25872 a: __m512i,
25873) -> __m256i {
25874 unsafe {
25875 static_assert_uimm_bits!(IMM1, 1);
25876 let r: __m256i = _mm512_extracti64x4_epi64::<IMM1>(a);
25877 transmute(src:simd_select_bitmask(m:k, yes:r.as_i64x4(), no:src.as_i64x4()))
25878 }
25879}
25880
25881/// Extract 256 bits (composed of 4 packed 64-bit integers) from a, selected with IMM1, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25882///
25883/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_extracti64x4_epi64&expand=2475)
25884#[inline]
25885#[target_feature(enable = "avx512f")]
25886#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25887#[cfg_attr(test, assert_instr(vextracti64x4, IMM1 = 1))]
25888#[rustc_legacy_const_generics(2)]
25889#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25890pub const fn _mm512_maskz_extracti64x4_epi64<const IMM1: i32>(k: __mmask8, a: __m512i) -> __m256i {
25891 unsafe {
25892 static_assert_uimm_bits!(IMM1, 1);
25893 let r: __m256i = _mm512_extracti64x4_epi64::<IMM1>(a);
25894 transmute(src:simd_select_bitmask(m:k, yes:r.as_i64x4(), no:i64x4::ZERO))
25895 }
25896}
25897
25898/// Extract 256 bits (composed of 4 packed double-precision (64-bit) floating-point elements) from a, selected with imm8, and store the result in dst.
25899///
25900/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_extractf64x4_pd&expand=2454)
25901#[inline]
25902#[target_feature(enable = "avx512f")]
25903#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25904#[cfg_attr(test, assert_instr(vextractf64x4, IMM8 = 1))]
25905#[rustc_legacy_const_generics(1)]
25906#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25907pub const fn _mm512_extractf64x4_pd<const IMM8: i32>(a: __m512d) -> __m256d {
25908 unsafe {
25909 static_assert_uimm_bits!(IMM8, 1);
25910 match IMM8 & 0x1 {
25911 0 => simd_shuffle!(a, _mm512_undefined_pd(), [0, 1, 2, 3]),
25912 _ => simd_shuffle!(a, _mm512_undefined_pd(), [4, 5, 6, 7]),
25913 }
25914 }
25915}
25916
25917/// Extract 256 bits (composed of 4 packed double-precision (64-bit) floating-point elements) from a, selected with imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25918///
25919/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_extractf64x4_pd&expand=2455)
25920#[inline]
25921#[target_feature(enable = "avx512f")]
25922#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25923#[cfg_attr(test, assert_instr(vextractf64x4, IMM8 = 1))]
25924#[rustc_legacy_const_generics(3)]
25925#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25926pub const fn _mm512_mask_extractf64x4_pd<const IMM8: i32>(
25927 src: __m256d,
25928 k: __mmask8,
25929 a: __m512d,
25930) -> __m256d {
25931 unsafe {
25932 static_assert_uimm_bits!(IMM8, 1);
25933 let r: __m256d = _mm512_extractf64x4_pd::<IMM8>(a);
25934 transmute(src:simd_select_bitmask(m:k, yes:r.as_f64x4(), no:src.as_f64x4()))
25935 }
25936}
25937
25938/// Extract 256 bits (composed of 4 packed double-precision (64-bit) floating-point elements) from a, selected with imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25939///
25940/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_extractf64x4_pd&expand=2456)
25941#[inline]
25942#[target_feature(enable = "avx512f")]
25943#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25944#[cfg_attr(test, assert_instr(vextractf64x4, IMM8 = 1))]
25945#[rustc_legacy_const_generics(2)]
25946#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25947pub const fn _mm512_maskz_extractf64x4_pd<const IMM8: i32>(k: __mmask8, a: __m512d) -> __m256d {
25948 unsafe {
25949 static_assert_uimm_bits!(IMM8, 1);
25950 let r: __m256d = _mm512_extractf64x4_pd::<IMM8>(a);
25951 transmute(src:simd_select_bitmask(m:k, yes:r.as_f64x4(), no:f64x4::ZERO))
25952 }
25953}
25954
25955/// Extract 128 bits (composed of 4 packed 32-bit integers) from a, selected with IMM2, and store the result in dst.
25956///
25957/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_extracti32x4_epi32&expand=2461)
25958#[inline]
25959#[target_feature(enable = "avx512f")]
25960#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25961#[cfg_attr(
25962 test,
25963 assert_instr(vextractf32x4, IMM2 = 3) //should be vextracti32x4
25964)]
25965#[rustc_legacy_const_generics(1)]
25966#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25967pub const fn _mm512_extracti32x4_epi32<const IMM2: i32>(a: __m512i) -> __m128i {
25968 unsafe {
25969 static_assert_uimm_bits!(IMM2, 2);
25970 let a: Simd = a.as_i32x16();
25971 let zero: Simd = i32x16::ZERO;
25972 let extract: i32x4 = match IMM2 {
25973 0 => simd_shuffle!(a, zero, [0, 1, 2, 3]),
25974 1 => simd_shuffle!(a, zero, [4, 5, 6, 7]),
25975 2 => simd_shuffle!(a, zero, [8, 9, 10, 11]),
25976 _ => simd_shuffle!(a, zero, [12, 13, 14, 15]),
25977 };
25978 transmute(src:extract)
25979 }
25980}
25981
25982/// Extract 128 bits (composed of 4 packed 32-bit integers) from a, selected with IMM2, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25983///
25984/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_extracti32x4_epi32&expand=2462)
25985#[inline]
25986#[target_feature(enable = "avx512f")]
25987#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25988#[cfg_attr(test, assert_instr(vextracti32x4, IMM2 = 3))]
25989#[rustc_legacy_const_generics(3)]
25990#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25991pub const fn _mm512_mask_extracti32x4_epi32<const IMM2: i32>(
25992 src: __m128i,
25993 k: __mmask8,
25994 a: __m512i,
25995) -> __m128i {
25996 unsafe {
25997 static_assert_uimm_bits!(IMM2, 2);
25998 let r: __m128i = _mm512_extracti32x4_epi32::<IMM2>(a);
25999 transmute(src:simd_select_bitmask(m:k, yes:r.as_i32x4(), no:src.as_i32x4()))
26000 }
26001}
26002
26003/// Extract 128 bits (composed of 4 packed 32-bit integers) from a, selected with IMM2, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26004///
26005/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_extracti32x4_epi32&expand=2463)
26006#[inline]
26007#[target_feature(enable = "avx512f")]
26008#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26009#[cfg_attr(test, assert_instr(vextracti32x4, IMM2 = 3))]
26010#[rustc_legacy_const_generics(2)]
26011#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26012pub const fn _mm512_maskz_extracti32x4_epi32<const IMM2: i32>(k: __mmask8, a: __m512i) -> __m128i {
26013 unsafe {
26014 static_assert_uimm_bits!(IMM2, 2);
26015 let r: __m128i = _mm512_extracti32x4_epi32::<IMM2>(a);
26016 transmute(src:simd_select_bitmask(m:k, yes:r.as_i32x4(), no:i32x4::ZERO))
26017 }
26018}
26019
26020/// Extract 128 bits (composed of 4 packed 32-bit integers) from a, selected with IMM1, and store the result in dst.
26021///
26022/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_extracti32x4_epi32&expand=2458)
26023#[inline]
26024#[target_feature(enable = "avx512f,avx512vl")]
26025#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26026#[cfg_attr(
26027 test,
26028 assert_instr(vextract, IMM1 = 1) //should be vextracti32x4
26029)]
26030#[rustc_legacy_const_generics(1)]
26031#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26032pub const fn _mm256_extracti32x4_epi32<const IMM1: i32>(a: __m256i) -> __m128i {
26033 unsafe {
26034 static_assert_uimm_bits!(IMM1, 1);
26035 let a: Simd = a.as_i32x8();
26036 let zero: Simd = i32x8::ZERO;
26037 let extract: i32x4 = match IMM1 {
26038 0 => simd_shuffle!(a, zero, [0, 1, 2, 3]),
26039 _ => simd_shuffle!(a, zero, [4, 5, 6, 7]),
26040 };
26041 transmute(src:extract)
26042 }
26043}
26044
26045/// Extract 128 bits (composed of 4 packed 32-bit integers) from a, selected with IMM1, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26046///
26047/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_extracti32x4_epi32&expand=2459)
26048#[inline]
26049#[target_feature(enable = "avx512f,avx512vl")]
26050#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26051#[cfg_attr(test, assert_instr(vextracti32x4, IMM1 = 1))]
26052#[rustc_legacy_const_generics(3)]
26053#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26054pub const fn _mm256_mask_extracti32x4_epi32<const IMM1: i32>(
26055 src: __m128i,
26056 k: __mmask8,
26057 a: __m256i,
26058) -> __m128i {
26059 unsafe {
26060 static_assert_uimm_bits!(IMM1, 1);
26061 let r: __m128i = _mm256_extracti32x4_epi32::<IMM1>(a);
26062 transmute(src:simd_select_bitmask(m:k, yes:r.as_i32x4(), no:src.as_i32x4()))
26063 }
26064}
26065
26066/// Extract 128 bits (composed of 4 packed 32-bit integers) from a, selected with IMM1, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26067///
26068/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_extracti32x4_epi32&expand=2460)
26069#[inline]
26070#[target_feature(enable = "avx512f,avx512vl")]
26071#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26072#[cfg_attr(test, assert_instr(vextracti32x4, IMM1 = 1))]
26073#[rustc_legacy_const_generics(2)]
26074#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26075pub const fn _mm256_maskz_extracti32x4_epi32<const IMM1: i32>(k: __mmask8, a: __m256i) -> __m128i {
26076 unsafe {
26077 static_assert_uimm_bits!(IMM1, 1);
26078 let r: __m128i = _mm256_extracti32x4_epi32::<IMM1>(a);
26079 transmute(src:simd_select_bitmask(m:k, yes:r.as_i32x4(), no:i32x4::ZERO))
26080 }
26081}
26082
26083/// Duplicate even-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst.
26084///
26085/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_moveldup_ps&expand=3862)
26086#[inline]
26087#[target_feature(enable = "avx512f")]
26088#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26089#[cfg_attr(test, assert_instr(vmovsldup))]
26090#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26091pub const fn _mm512_moveldup_ps(a: __m512) -> __m512 {
26092 unsafe {
26093 let r: f32x16 = simd_shuffle!(a, a, [0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14]);
26094 transmute(src:r)
26095 }
26096}
26097
26098/// Duplicate even-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26099///
26100/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_moveldup_ps&expand=3860)
26101#[inline]
26102#[target_feature(enable = "avx512f")]
26103#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26104#[cfg_attr(test, assert_instr(vmovsldup))]
26105#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26106pub const fn _mm512_mask_moveldup_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 {
26107 unsafe {
26108 let mov: f32x16 =
26109 simd_shuffle!(a, a, [0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14]);
26110 transmute(src:simd_select_bitmask(m:k, yes:mov, no:src.as_f32x16()))
26111 }
26112}
26113
26114/// Duplicate even-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26115///
26116/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_moveldup_ps&expand=3861)
26117#[inline]
26118#[target_feature(enable = "avx512f")]
26119#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26120#[cfg_attr(test, assert_instr(vmovsldup))]
26121#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26122pub const fn _mm512_maskz_moveldup_ps(k: __mmask16, a: __m512) -> __m512 {
26123 unsafe {
26124 let mov: f32x16 =
26125 simd_shuffle!(a, a, [0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14]);
26126 transmute(src:simd_select_bitmask(m:k, yes:mov, no:f32x16::ZERO))
26127 }
26128}
26129
26130/// Duplicate even-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26131///
26132/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_moveldup_ps&expand=3857)
26133#[inline]
26134#[target_feature(enable = "avx512f,avx512vl")]
26135#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26136#[cfg_attr(test, assert_instr(vmovsldup))]
26137#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26138pub const fn _mm256_mask_moveldup_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 {
26139 unsafe {
26140 let mov: __m256 = _mm256_moveldup_ps(a);
26141 transmute(src:simd_select_bitmask(m:k, yes:mov.as_f32x8(), no:src.as_f32x8()))
26142 }
26143}
26144
26145/// Duplicate even-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26146///
26147/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_moveldup_ps&expand=3858)
26148#[inline]
26149#[target_feature(enable = "avx512f,avx512vl")]
26150#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26151#[cfg_attr(test, assert_instr(vmovsldup))]
26152#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26153pub const fn _mm256_maskz_moveldup_ps(k: __mmask8, a: __m256) -> __m256 {
26154 unsafe {
26155 let mov: __m256 = _mm256_moveldup_ps(a);
26156 transmute(src:simd_select_bitmask(m:k, yes:mov.as_f32x8(), no:f32x8::ZERO))
26157 }
26158}
26159
26160/// Duplicate even-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26161///
26162/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_moveldup_ps&expand=3854)
26163#[inline]
26164#[target_feature(enable = "avx512f,avx512vl")]
26165#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26166#[cfg_attr(test, assert_instr(vmovsldup))]
26167#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26168pub const fn _mm_mask_moveldup_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 {
26169 unsafe {
26170 let mov: __m128 = _mm_moveldup_ps(a);
26171 transmute(src:simd_select_bitmask(m:k, yes:mov.as_f32x4(), no:src.as_f32x4()))
26172 }
26173}
26174
26175/// Duplicate even-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26176///
26177/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_moveldup_ps&expand=3855)
26178#[inline]
26179#[target_feature(enable = "avx512f,avx512vl")]
26180#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26181#[cfg_attr(test, assert_instr(vmovsldup))]
26182#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26183pub const fn _mm_maskz_moveldup_ps(k: __mmask8, a: __m128) -> __m128 {
26184 unsafe {
26185 let mov: __m128 = _mm_moveldup_ps(a);
26186 transmute(src:simd_select_bitmask(m:k, yes:mov.as_f32x4(), no:f32x4::ZERO))
26187 }
26188}
26189
26190/// Duplicate odd-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst.
26191///
26192/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_movehdup_ps&expand=3852)
26193#[inline]
26194#[target_feature(enable = "avx512f")]
26195#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26196#[cfg_attr(test, assert_instr(vmovshdup))]
26197#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26198pub const fn _mm512_movehdup_ps(a: __m512) -> __m512 {
26199 unsafe {
26200 let r: f32x16 = simd_shuffle!(a, a, [1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15]);
26201 transmute(src:r)
26202 }
26203}
26204
26205/// Duplicate odd-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26206///
26207/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_movehdup_ps&expand=3850)
26208#[inline]
26209#[target_feature(enable = "avx512f")]
26210#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26211#[cfg_attr(test, assert_instr(vmovshdup))]
26212#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26213pub const fn _mm512_mask_movehdup_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 {
26214 unsafe {
26215 let mov: f32x16 =
26216 simd_shuffle!(a, a, [1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15]);
26217 transmute(src:simd_select_bitmask(m:k, yes:mov, no:src.as_f32x16()))
26218 }
26219}
26220
26221/// Duplicate odd-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26222///
26223/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_movehdup_ps&expand=3851)
26224#[inline]
26225#[target_feature(enable = "avx512f")]
26226#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26227#[cfg_attr(test, assert_instr(vmovshdup))]
26228#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26229pub const fn _mm512_maskz_movehdup_ps(k: __mmask16, a: __m512) -> __m512 {
26230 unsafe {
26231 let mov: f32x16 =
26232 simd_shuffle!(a, a, [1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15]);
26233 transmute(src:simd_select_bitmask(m:k, yes:mov, no:f32x16::ZERO))
26234 }
26235}
26236
26237/// Duplicate odd-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26238///
26239/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_movehdup_ps&expand=3847)
26240#[inline]
26241#[target_feature(enable = "avx512f,avx512vl")]
26242#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26243#[cfg_attr(test, assert_instr(vmovshdup))]
26244#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26245pub const fn _mm256_mask_movehdup_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 {
26246 unsafe {
26247 let mov: __m256 = _mm256_movehdup_ps(a);
26248 transmute(src:simd_select_bitmask(m:k, yes:mov.as_f32x8(), no:src.as_f32x8()))
26249 }
26250}
26251
26252/// Duplicate odd-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26253///
26254/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_movehdup_ps&expand=3848)
26255#[inline]
26256#[target_feature(enable = "avx512f,avx512vl")]
26257#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26258#[cfg_attr(test, assert_instr(vmovshdup))]
26259#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26260pub const fn _mm256_maskz_movehdup_ps(k: __mmask8, a: __m256) -> __m256 {
26261 unsafe {
26262 let mov: __m256 = _mm256_movehdup_ps(a);
26263 transmute(src:simd_select_bitmask(m:k, yes:mov.as_f32x8(), no:f32x8::ZERO))
26264 }
26265}
26266
26267/// Duplicate odd-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26268///
26269/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_movehdup_ps&expand=3844)
26270#[inline]
26271#[target_feature(enable = "avx512f,avx512vl")]
26272#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26273#[cfg_attr(test, assert_instr(vmovshdup))]
26274#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26275pub const fn _mm_mask_movehdup_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 {
26276 unsafe {
26277 let mov: __m128 = _mm_movehdup_ps(a);
26278 transmute(src:simd_select_bitmask(m:k, yes:mov.as_f32x4(), no:src.as_f32x4()))
26279 }
26280}
26281
26282/// Duplicate odd-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26283///
26284/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_movehdup_ps&expand=3845)
26285#[inline]
26286#[target_feature(enable = "avx512f,avx512vl")]
26287#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26288#[cfg_attr(test, assert_instr(vmovshdup))]
26289#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26290pub const fn _mm_maskz_movehdup_ps(k: __mmask8, a: __m128) -> __m128 {
26291 unsafe {
26292 let mov: __m128 = _mm_movehdup_ps(a);
26293 transmute(src:simd_select_bitmask(m:k, yes:mov.as_f32x4(), no:f32x4::ZERO))
26294 }
26295}
26296
26297/// Duplicate even-indexed double-precision (64-bit) floating-point elements from a, and store the results in dst.
26298///
26299/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_movedup_pd&expand=3843)
26300#[inline]
26301#[target_feature(enable = "avx512f")]
26302#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26303#[cfg_attr(test, assert_instr(vmovddup))]
26304#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26305pub const fn _mm512_movedup_pd(a: __m512d) -> __m512d {
26306 unsafe {
26307 let r: f64x8 = simd_shuffle!(a, a, [0, 0, 2, 2, 4, 4, 6, 6]);
26308 transmute(src:r)
26309 }
26310}
26311
26312/// Duplicate even-indexed double-precision (64-bit) floating-point elements from a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26313///
26314/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_movedup_pd&expand=3841)
26315#[inline]
26316#[target_feature(enable = "avx512f")]
26317#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26318#[cfg_attr(test, assert_instr(vmovddup))]
26319#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26320pub const fn _mm512_mask_movedup_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d {
26321 unsafe {
26322 let mov: f64x8 = simd_shuffle!(a, a, [0, 0, 2, 2, 4, 4, 6, 6]);
26323 transmute(src:simd_select_bitmask(m:k, yes:mov, no:src.as_f64x8()))
26324 }
26325}
26326
26327/// Duplicate even-indexed double-precision (64-bit) floating-point elements from a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26328///
26329/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_movedup_pd&expand=3842)
26330#[inline]
26331#[target_feature(enable = "avx512f")]
26332#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26333#[cfg_attr(test, assert_instr(vmovddup))]
26334#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26335pub const fn _mm512_maskz_movedup_pd(k: __mmask8, a: __m512d) -> __m512d {
26336 unsafe {
26337 let mov: f64x8 = simd_shuffle!(a, a, [0, 0, 2, 2, 4, 4, 6, 6]);
26338 transmute(src:simd_select_bitmask(m:k, yes:mov, no:f64x8::ZERO))
26339 }
26340}
26341
26342/// Duplicate even-indexed double-precision (64-bit) floating-point elements from a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26343///
26344/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_movedup_pd&expand=3838)
26345#[inline]
26346#[target_feature(enable = "avx512f,avx512vl")]
26347#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26348#[cfg_attr(test, assert_instr(vmovddup))]
26349#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26350pub const fn _mm256_mask_movedup_pd(src: __m256d, k: __mmask8, a: __m256d) -> __m256d {
26351 unsafe {
26352 let mov: __m256d = _mm256_movedup_pd(a);
26353 transmute(src:simd_select_bitmask(m:k, yes:mov.as_f64x4(), no:src.as_f64x4()))
26354 }
26355}
26356
26357/// Duplicate even-indexed double-precision (64-bit) floating-point elements from a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26358///
26359/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_movedup_pd&expand=3839)
26360#[inline]
26361#[target_feature(enable = "avx512f,avx512vl")]
26362#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26363#[cfg_attr(test, assert_instr(vmovddup))]
26364#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26365pub const fn _mm256_maskz_movedup_pd(k: __mmask8, a: __m256d) -> __m256d {
26366 unsafe {
26367 let mov: __m256d = _mm256_movedup_pd(a);
26368 transmute(src:simd_select_bitmask(m:k, yes:mov.as_f64x4(), no:f64x4::ZERO))
26369 }
26370}
26371
26372/// Duplicate even-indexed double-precision (64-bit) floating-point elements from a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26373///
26374/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_movedup_pd&expand=3835)
26375#[inline]
26376#[target_feature(enable = "avx512f,avx512vl")]
26377#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26378#[cfg_attr(test, assert_instr(vmovddup))]
26379#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26380pub const fn _mm_mask_movedup_pd(src: __m128d, k: __mmask8, a: __m128d) -> __m128d {
26381 unsafe {
26382 let mov: __m128d = _mm_movedup_pd(a);
26383 transmute(src:simd_select_bitmask(m:k, yes:mov.as_f64x2(), no:src.as_f64x2()))
26384 }
26385}
26386
26387/// Duplicate even-indexed double-precision (64-bit) floating-point elements from a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26388///
26389/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_movedup_pd&expand=3836)
26390#[inline]
26391#[target_feature(enable = "avx512f,avx512vl")]
26392#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26393#[cfg_attr(test, assert_instr(vmovddup))]
26394#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26395pub const fn _mm_maskz_movedup_pd(k: __mmask8, a: __m128d) -> __m128d {
26396 unsafe {
26397 let mov: __m128d = _mm_movedup_pd(a);
26398 transmute(src:simd_select_bitmask(m:k, yes:mov.as_f64x2(), no:f64x2::ZERO))
26399 }
26400}
26401
26402/// Copy a to dst, then insert 128 bits (composed of 4 packed 32-bit integers) from b into dst at the location specified by imm8.
26403///
26404/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_inserti32x4&expand=3174)
26405#[inline]
26406#[target_feature(enable = "avx512f")]
26407#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26408#[cfg_attr(test, assert_instr(vinsertf32x4, IMM8 = 2))] //should be vinserti32x4
26409#[rustc_legacy_const_generics(2)]
26410#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26411pub const fn _mm512_inserti32x4<const IMM8: i32>(a: __m512i, b: __m128i) -> __m512i {
26412 unsafe {
26413 static_assert_uimm_bits!(IMM8, 2);
26414 let a = a.as_i32x16();
26415 let b = _mm512_castsi128_si512(b).as_i32x16();
26416 let ret: i32x16 = match IMM8 & 0b11 {
26417 0 => {
26418 simd_shuffle!(
26419 a,
26420 b,
26421 [16, 17, 18, 19, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
26422 )
26423 }
26424 1 => {
26425 simd_shuffle!(
26426 a,
26427 b,
26428 [0, 1, 2, 3, 16, 17, 18, 19, 8, 9, 10, 11, 12, 13, 14, 15],
26429 )
26430 }
26431 2 => {
26432 simd_shuffle!(
26433 a,
26434 b,
26435 [0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 12, 13, 14, 15],
26436 )
26437 }
26438 _ => {
26439 simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 16, 17, 18, 19])
26440 }
26441 };
26442 transmute(ret)
26443 }
26444}
26445
26446/// Copy a to tmp, then insert 128 bits (composed of 4 packed 32-bit integers) from b into tmp at the location specified by imm8. Store tmp to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26447///
26448/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_inserti32x4&expand=3175)
26449#[inline]
26450#[target_feature(enable = "avx512f")]
26451#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26452#[cfg_attr(test, assert_instr(vinserti32x4, IMM8 = 2))]
26453#[rustc_legacy_const_generics(4)]
26454#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26455pub const fn _mm512_mask_inserti32x4<const IMM8: i32>(
26456 src: __m512i,
26457 k: __mmask16,
26458 a: __m512i,
26459 b: __m128i,
26460) -> __m512i {
26461 unsafe {
26462 static_assert_uimm_bits!(IMM8, 2);
26463 let r: __m512i = _mm512_inserti32x4::<IMM8>(a, b);
26464 transmute(src:simd_select_bitmask(m:k, yes:r.as_i32x16(), no:src.as_i32x16()))
26465 }
26466}
26467
26468/// Copy a to tmp, then insert 128 bits (composed of 4 packed 32-bit integers) from b into tmp at the location specified by imm8. Store tmp to dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26469///
26470/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_inserti32x4&expand=3176)
26471#[inline]
26472#[target_feature(enable = "avx512f")]
26473#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26474#[cfg_attr(test, assert_instr(vinserti32x4, IMM8 = 2))]
26475#[rustc_legacy_const_generics(3)]
26476#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26477pub const fn _mm512_maskz_inserti32x4<const IMM8: i32>(
26478 k: __mmask16,
26479 a: __m512i,
26480 b: __m128i,
26481) -> __m512i {
26482 unsafe {
26483 static_assert_uimm_bits!(IMM8, 2);
26484 let r: __m512i = _mm512_inserti32x4::<IMM8>(a, b);
26485 transmute(src:simd_select_bitmask(m:k, yes:r.as_i32x16(), no:i32x16::ZERO))
26486 }
26487}
26488
26489/// Copy a to dst, then insert 128 bits (composed of 4 packed 32-bit integers) from b into dst at the location specified by imm8.
26490///
26491/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_inserti32x4&expand=3171)
26492#[inline]
26493#[target_feature(enable = "avx512f,avx512vl")]
26494#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26495#[cfg_attr(
26496 test,
26497 assert_instr(vinsert, IMM8 = 1) //should be vinserti32x4
26498)]
26499#[rustc_legacy_const_generics(2)]
26500#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26501pub const fn _mm256_inserti32x4<const IMM8: i32>(a: __m256i, b: __m128i) -> __m256i {
26502 unsafe {
26503 static_assert_uimm_bits!(IMM8, 1);
26504 let a: Simd = a.as_i32x8();
26505 let b: Simd = _mm256_castsi128_si256(b).as_i32x8();
26506 let ret: i32x8 = match IMM8 & 0b1 {
26507 0 => simd_shuffle!(a, b, [8, 9, 10, 11, 4, 5, 6, 7]),
26508 _ => simd_shuffle!(a, b, [0, 1, 2, 3, 8, 9, 10, 11]),
26509 };
26510 transmute(src:ret)
26511 }
26512}
26513
26514/// Copy a to tmp, then insert 128 bits (composed of 4 packed 32-bit integers) from b into tmp at the location specified by imm8. Store tmp to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26515///
26516/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_inserti32x4&expand=3172)
26517#[inline]
26518#[target_feature(enable = "avx512f,avx512vl")]
26519#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26520#[cfg_attr(test, assert_instr(vinserti32x4, IMM8 = 1))]
26521#[rustc_legacy_const_generics(4)]
26522#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26523pub const fn _mm256_mask_inserti32x4<const IMM8: i32>(
26524 src: __m256i,
26525 k: __mmask8,
26526 a: __m256i,
26527 b: __m128i,
26528) -> __m256i {
26529 unsafe {
26530 static_assert_uimm_bits!(IMM8, 1);
26531 let r: __m256i = _mm256_inserti32x4::<IMM8>(a, b);
26532 transmute(src:simd_select_bitmask(m:k, yes:r.as_i32x8(), no:src.as_i32x8()))
26533 }
26534}
26535
26536/// Copy a to tmp, then insert 128 bits (composed of 4 packed 32-bit integers) from b into tmp at the location specified by imm8. Store tmp to dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26537///
26538/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_inserti32x4&expand=3173)
26539#[inline]
26540#[target_feature(enable = "avx512f,avx512vl")]
26541#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26542#[cfg_attr(test, assert_instr(vinserti32x4, IMM8 = 1))]
26543#[rustc_legacy_const_generics(3)]
26544#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26545pub const fn _mm256_maskz_inserti32x4<const IMM8: i32>(
26546 k: __mmask8,
26547 a: __m256i,
26548 b: __m128i,
26549) -> __m256i {
26550 unsafe {
26551 static_assert_uimm_bits!(IMM8, 1);
26552 let r: __m256i = _mm256_inserti32x4::<IMM8>(a, b);
26553 transmute(src:simd_select_bitmask(m:k, yes:r.as_i32x8(), no:i32x8::ZERO))
26554 }
26555}
26556
26557/// Copy a to dst, then insert 256 bits (composed of 4 packed 64-bit integers) from b into dst at the location specified by imm8.
26558///
26559/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_inserti64x4&expand=3186)
26560#[inline]
26561#[target_feature(enable = "avx512f")]
26562#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26563#[cfg_attr(test, assert_instr(vinsertf64x4, IMM8 = 1))] //should be vinserti64x4
26564#[rustc_legacy_const_generics(2)]
26565#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26566pub const fn _mm512_inserti64x4<const IMM8: i32>(a: __m512i, b: __m256i) -> __m512i {
26567 unsafe {
26568 static_assert_uimm_bits!(IMM8, 1);
26569 let b: __m512i = _mm512_castsi256_si512(b);
26570 match IMM8 & 0b1 {
26571 0 => simd_shuffle!(a, b, [8, 9, 10, 11, 4, 5, 6, 7]),
26572 _ => simd_shuffle!(a, b, [0, 1, 2, 3, 8, 9, 10, 11]),
26573 }
26574 }
26575}
26576
26577/// Copy a to tmp, then insert 256 bits (composed of 4 packed 64-bit integers) from b into tmp at the location specified by imm8. Store tmp to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26578///
26579/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_inserti64x4&expand=3187)
26580#[inline]
26581#[target_feature(enable = "avx512f")]
26582#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26583#[cfg_attr(test, assert_instr(vinserti64x4, IMM8 = 1))]
26584#[rustc_legacy_const_generics(4)]
26585#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26586pub const fn _mm512_mask_inserti64x4<const IMM8: i32>(
26587 src: __m512i,
26588 k: __mmask8,
26589 a: __m512i,
26590 b: __m256i,
26591) -> __m512i {
26592 unsafe {
26593 static_assert_uimm_bits!(IMM8, 1);
26594 let r: __m512i = _mm512_inserti64x4::<IMM8>(a, b);
26595 transmute(src:simd_select_bitmask(m:k, yes:r.as_i64x8(), no:src.as_i64x8()))
26596 }
26597}
26598
26599/// Copy a to tmp, then insert 256 bits (composed of 4 packed 64-bit integers) from b into tmp at the location specified by imm8. Store tmp to dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26600///
26601/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_inserti64x4&expand=3188)
26602#[inline]
26603#[target_feature(enable = "avx512f")]
26604#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26605#[cfg_attr(test, assert_instr(vinserti64x4, IMM8 = 1))]
26606#[rustc_legacy_const_generics(3)]
26607#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26608pub const fn _mm512_maskz_inserti64x4<const IMM8: i32>(
26609 k: __mmask8,
26610 a: __m512i,
26611 b: __m256i,
26612) -> __m512i {
26613 unsafe {
26614 static_assert_uimm_bits!(IMM8, 1);
26615 let r: __m512i = _mm512_inserti64x4::<IMM8>(a, b);
26616 transmute(src:simd_select_bitmask(m:k, yes:r.as_i64x8(), no:i64x8::ZERO))
26617 }
26618}
26619
26620/// Copy a to dst, then insert 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from b into dst at the location specified by imm8.
26621///
26622/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_insertf32x4&expand=3155)
26623#[inline]
26624#[target_feature(enable = "avx512f")]
26625#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26626#[cfg_attr(test, assert_instr(vinsertf32x4, IMM8 = 2))]
26627#[rustc_legacy_const_generics(2)]
26628#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26629pub const fn _mm512_insertf32x4<const IMM8: i32>(a: __m512, b: __m128) -> __m512 {
26630 unsafe {
26631 static_assert_uimm_bits!(IMM8, 2);
26632 let b = _mm512_castps128_ps512(b);
26633 match IMM8 & 0b11 {
26634 0 => {
26635 simd_shuffle!(
26636 a,
26637 b,
26638 [16, 17, 18, 19, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
26639 )
26640 }
26641 1 => {
26642 simd_shuffle!(
26643 a,
26644 b,
26645 [0, 1, 2, 3, 16, 17, 18, 19, 8, 9, 10, 11, 12, 13, 14, 15],
26646 )
26647 }
26648 2 => {
26649 simd_shuffle!(
26650 a,
26651 b,
26652 [0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 12, 13, 14, 15],
26653 )
26654 }
26655 _ => {
26656 simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 16, 17, 18, 19])
26657 }
26658 }
26659 }
26660}
26661
26662/// Copy a to tmp, then insert 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from b into tmp at the location specified by imm8. Store tmp to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26663///
26664/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_insertf32x4&expand=3156)
26665#[inline]
26666#[target_feature(enable = "avx512f")]
26667#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26668#[cfg_attr(test, assert_instr(vinsertf32x4, IMM8 = 2))]
26669#[rustc_legacy_const_generics(4)]
26670#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26671pub const fn _mm512_mask_insertf32x4<const IMM8: i32>(
26672 src: __m512,
26673 k: __mmask16,
26674 a: __m512,
26675 b: __m128,
26676) -> __m512 {
26677 unsafe {
26678 static_assert_uimm_bits!(IMM8, 2);
26679 let r: __m512 = _mm512_insertf32x4::<IMM8>(a, b);
26680 transmute(src:simd_select_bitmask(m:k, yes:r.as_f32x16(), no:src.as_f32x16()))
26681 }
26682}
26683
26684/// Copy a to tmp, then insert 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from b into tmp at the location specified by imm8. Store tmp to dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26685///
26686/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_insertf32x4&expand=3157)
26687#[inline]
26688#[target_feature(enable = "avx512f")]
26689#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26690#[cfg_attr(test, assert_instr(vinsertf32x4, IMM8 = 2))]
26691#[rustc_legacy_const_generics(3)]
26692#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26693pub const fn _mm512_maskz_insertf32x4<const IMM8: i32>(
26694 k: __mmask16,
26695 a: __m512,
26696 b: __m128,
26697) -> __m512 {
26698 unsafe {
26699 static_assert_uimm_bits!(IMM8, 2);
26700 let r: __m512 = _mm512_insertf32x4::<IMM8>(a, b);
26701 transmute(src:simd_select_bitmask(m:k, yes:r.as_f32x16(), no:f32x16::ZERO))
26702 }
26703}
26704
26705/// Copy a to dst, then insert 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from b into dst at the location specified by imm8.
26706///
26707/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_insertf32x4&expand=3152)
26708#[inline]
26709#[target_feature(enable = "avx512f,avx512vl")]
26710#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26711#[cfg_attr(
26712 test,
26713 assert_instr(vinsert, IMM8 = 1) //should be vinsertf32x4
26714)]
26715#[rustc_legacy_const_generics(2)]
26716#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26717pub const fn _mm256_insertf32x4<const IMM8: i32>(a: __m256, b: __m128) -> __m256 {
26718 unsafe {
26719 static_assert_uimm_bits!(IMM8, 1);
26720 let b: __m256 = _mm256_castps128_ps256(b);
26721 match IMM8 & 0b1 {
26722 0 => simd_shuffle!(a, b, [8, 9, 10, 11, 4, 5, 6, 7]),
26723 _ => simd_shuffle!(a, b, [0, 1, 2, 3, 8, 9, 10, 11]),
26724 }
26725 }
26726}
26727
26728/// Copy a to tmp, then insert 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from b into tmp at the location specified by imm8. Store tmp to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26729///
26730/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_insertf32x4&expand=3153)
26731#[inline]
26732#[target_feature(enable = "avx512f,avx512vl")]
26733#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26734#[cfg_attr(test, assert_instr(vinsertf32x4, IMM8 = 1))]
26735#[rustc_legacy_const_generics(4)]
26736#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26737pub const fn _mm256_mask_insertf32x4<const IMM8: i32>(
26738 src: __m256,
26739 k: __mmask8,
26740 a: __m256,
26741 b: __m128,
26742) -> __m256 {
26743 unsafe {
26744 static_assert_uimm_bits!(IMM8, 1);
26745 let r: __m256 = _mm256_insertf32x4::<IMM8>(a, b);
26746 transmute(src:simd_select_bitmask(m:k, yes:r.as_f32x8(), no:src.as_f32x8()))
26747 }
26748}
26749
26750/// Copy a to tmp, then insert 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from b into tmp at the location specified by imm8. Store tmp to dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26751///
26752/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_insertf32x4&expand=3154)
26753#[inline]
26754#[target_feature(enable = "avx512f,avx512vl")]
26755#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26756#[cfg_attr(test, assert_instr(vinsertf32x4, IMM8 = 1))]
26757#[rustc_legacy_const_generics(3)]
26758#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26759pub const fn _mm256_maskz_insertf32x4<const IMM8: i32>(
26760 k: __mmask8,
26761 a: __m256,
26762 b: __m128,
26763) -> __m256 {
26764 unsafe {
26765 static_assert_uimm_bits!(IMM8, 1);
26766 let r: __m256 = _mm256_insertf32x4::<IMM8>(a, b);
26767 transmute(src:simd_select_bitmask(m:k, yes:r.as_f32x8(), no:f32x8::ZERO))
26768 }
26769}
26770
26771/// Copy a to dst, then insert 256 bits (composed of 4 packed double-precision (64-bit) floating-point elements) from b into dst at the location specified by imm8.
26772///
26773/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_insertf64x4&expand=3167)
26774#[inline]
26775#[target_feature(enable = "avx512f")]
26776#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26777#[cfg_attr(test, assert_instr(vinsertf64x4, IMM8 = 1))]
26778#[rustc_legacy_const_generics(2)]
26779#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26780pub const fn _mm512_insertf64x4<const IMM8: i32>(a: __m512d, b: __m256d) -> __m512d {
26781 unsafe {
26782 static_assert_uimm_bits!(IMM8, 1);
26783 let b: __m512d = _mm512_castpd256_pd512(b);
26784 match IMM8 & 0b1 {
26785 0 => simd_shuffle!(a, b, [8, 9, 10, 11, 4, 5, 6, 7]),
26786 _ => simd_shuffle!(a, b, [0, 1, 2, 3, 8, 9, 10, 11]),
26787 }
26788 }
26789}
26790
26791/// Copy a to tmp, then insert 256 bits (composed of 4 packed double-precision (64-bit) floating-point elements) from b into tmp at the location specified by imm8. Store tmp to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26792///
26793/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_insertf64x4&expand=3168)
26794#[inline]
26795#[target_feature(enable = "avx512f")]
26796#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26797#[cfg_attr(test, assert_instr(vinsertf64x4, IMM8 = 1))]
26798#[rustc_legacy_const_generics(4)]
26799#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26800pub const fn _mm512_mask_insertf64x4<const IMM8: i32>(
26801 src: __m512d,
26802 k: __mmask8,
26803 a: __m512d,
26804 b: __m256d,
26805) -> __m512d {
26806 unsafe {
26807 static_assert_uimm_bits!(IMM8, 1);
26808 let r: __m512d = _mm512_insertf64x4::<IMM8>(a, b);
26809 transmute(src:simd_select_bitmask(m:k, yes:r.as_f64x8(), no:src.as_f64x8()))
26810 }
26811}
26812
26813/// Copy a to tmp, then insert 256 bits (composed of 4 packed double-precision (64-bit) floating-point elements) from b into tmp at the location specified by imm8. Store tmp to dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26814///
26815/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_insertf64x4&expand=3169)
26816#[inline]
26817#[target_feature(enable = "avx512f")]
26818#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26819#[cfg_attr(test, assert_instr(vinsertf64x4, IMM8 = 1))]
26820#[rustc_legacy_const_generics(3)]
26821#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26822pub const fn _mm512_maskz_insertf64x4<const IMM8: i32>(
26823 k: __mmask8,
26824 a: __m512d,
26825 b: __m256d,
26826) -> __m512d {
26827 unsafe {
26828 static_assert_uimm_bits!(IMM8, 1);
26829 let r: __m512d = _mm512_insertf64x4::<IMM8>(a, b);
26830 transmute(src:simd_select_bitmask(m:k, yes:r.as_f64x8(), no:f64x8::ZERO))
26831 }
26832}
26833
26834/// Unpack and interleave 32-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst.
26835///
26836/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_unpackhi_epi32&expand=6021)
26837#[inline]
26838#[target_feature(enable = "avx512f")]
26839#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26840#[cfg_attr(test, assert_instr(vunpckhps))] //should be vpunpckhdq
26841#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26842pub const fn _mm512_unpackhi_epi32(a: __m512i, b: __m512i) -> __m512i {
26843 unsafe {
26844 let a: Simd = a.as_i32x16();
26845 let b: Simd = b.as_i32x16();
26846 #[rustfmt::skip]
26847 let r: i32x16 = simd_shuffle!(
26848 a, b,
26849 [ 2, 18, 3, 19,
26850 2 + 4, 18 + 4, 3 + 4, 19 + 4,
26851 2 + 8, 18 + 8, 3 + 8, 19 + 8,
26852 2 + 12, 18 + 12, 3 + 12, 19 + 12],
26853 );
26854 transmute(src:r)
26855 }
26856}
26857
26858/// Unpack and interleave 32-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26859///
26860/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_unpackhi_epi32&expand=6019)
26861#[inline]
26862#[target_feature(enable = "avx512f")]
26863#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26864#[cfg_attr(test, assert_instr(vpunpckhdq))]
26865#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26866pub const fn _mm512_mask_unpackhi_epi32(
26867 src: __m512i,
26868 k: __mmask16,
26869 a: __m512i,
26870 b: __m512i,
26871) -> __m512i {
26872 unsafe {
26873 let unpackhi: Simd = _mm512_unpackhi_epi32(a, b).as_i32x16();
26874 transmute(src:simd_select_bitmask(m:k, yes:unpackhi, no:src.as_i32x16()))
26875 }
26876}
26877
26878/// Unpack and interleave 32-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26879///
26880/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_unpackhi_epi32&expand=6020)
26881#[inline]
26882#[target_feature(enable = "avx512f")]
26883#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26884#[cfg_attr(test, assert_instr(vpunpckhdq))]
26885#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26886pub const fn _mm512_maskz_unpackhi_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
26887 unsafe {
26888 let unpackhi: Simd = _mm512_unpackhi_epi32(a, b).as_i32x16();
26889 transmute(src:simd_select_bitmask(m:k, yes:unpackhi, no:i32x16::ZERO))
26890 }
26891}
26892
26893/// Unpack and interleave 32-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26894///
26895/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_unpackhi_epi32&expand=6016)
26896#[inline]
26897#[target_feature(enable = "avx512f,avx512vl")]
26898#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26899#[cfg_attr(test, assert_instr(vpunpckhdq))]
26900#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26901pub const fn _mm256_mask_unpackhi_epi32(
26902 src: __m256i,
26903 k: __mmask8,
26904 a: __m256i,
26905 b: __m256i,
26906) -> __m256i {
26907 unsafe {
26908 let unpackhi: Simd = _mm256_unpackhi_epi32(a, b).as_i32x8();
26909 transmute(src:simd_select_bitmask(m:k, yes:unpackhi, no:src.as_i32x8()))
26910 }
26911}
26912
26913/// Unpack and interleave 32-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26914///
26915/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_unpackhi_epi32&expand=6017)
26916#[inline]
26917#[target_feature(enable = "avx512f,avx512vl")]
26918#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26919#[cfg_attr(test, assert_instr(vpunpckhdq))]
26920#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26921pub const fn _mm256_maskz_unpackhi_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
26922 unsafe {
26923 let unpackhi: Simd = _mm256_unpackhi_epi32(a, b).as_i32x8();
26924 transmute(src:simd_select_bitmask(m:k, yes:unpackhi, no:i32x8::ZERO))
26925 }
26926}
26927
26928/// Unpack and interleave 32-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26929///
26930/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_unpackhi_epi32&expand=6013)
26931#[inline]
26932#[target_feature(enable = "avx512f,avx512vl")]
26933#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26934#[cfg_attr(test, assert_instr(vpunpckhdq))]
26935#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26936pub const fn _mm_mask_unpackhi_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
26937 unsafe {
26938 let unpackhi: Simd = _mm_unpackhi_epi32(a, b).as_i32x4();
26939 transmute(src:simd_select_bitmask(m:k, yes:unpackhi, no:src.as_i32x4()))
26940 }
26941}
26942
26943/// Unpack and interleave 32-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26944///
26945/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_unpackhi_epi32&expand=6014)
26946#[inline]
26947#[target_feature(enable = "avx512f,avx512vl")]
26948#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26949#[cfg_attr(test, assert_instr(vpunpckhdq))]
26950#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26951pub const fn _mm_maskz_unpackhi_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
26952 unsafe {
26953 let unpackhi: Simd = _mm_unpackhi_epi32(a, b).as_i32x4();
26954 transmute(src:simd_select_bitmask(m:k, yes:unpackhi, no:i32x4::ZERO))
26955 }
26956}
26957
26958/// Unpack and interleave 64-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst.
26959///
26960/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_unpackhi_epi64&expand=6030)
26961#[inline]
26962#[target_feature(enable = "avx512f")]
26963#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26964#[cfg_attr(test, assert_instr(vunpckhpd))] //should be vpunpckhqdq
26965#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26966pub const fn _mm512_unpackhi_epi64(a: __m512i, b: __m512i) -> __m512i {
26967 unsafe { simd_shuffle!(a, b, [1, 9, 1 + 2, 9 + 2, 1 + 4, 9 + 4, 1 + 6, 9 + 6]) }
26968}
26969
26970/// Unpack and interleave 64-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26971///
26972/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_unpackhi_epi64&expand=6028)
26973#[inline]
26974#[target_feature(enable = "avx512f")]
26975#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26976#[cfg_attr(test, assert_instr(vpunpckhqdq))]
26977#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26978pub const fn _mm512_mask_unpackhi_epi64(
26979 src: __m512i,
26980 k: __mmask8,
26981 a: __m512i,
26982 b: __m512i,
26983) -> __m512i {
26984 unsafe {
26985 let unpackhi: Simd = _mm512_unpackhi_epi64(a, b).as_i64x8();
26986 transmute(src:simd_select_bitmask(m:k, yes:unpackhi, no:src.as_i64x8()))
26987 }
26988}
26989
26990/// Unpack and interleave 64-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26991///
26992/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_unpackhi_epi64&expand=6029)
26993#[inline]
26994#[target_feature(enable = "avx512f")]
26995#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26996#[cfg_attr(test, assert_instr(vpunpckhqdq))]
26997#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26998pub const fn _mm512_maskz_unpackhi_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
26999 unsafe {
27000 let unpackhi: Simd = _mm512_unpackhi_epi64(a, b).as_i64x8();
27001 transmute(src:simd_select_bitmask(m:k, yes:unpackhi, no:i64x8::ZERO))
27002 }
27003}
27004
27005/// Unpack and interleave 64-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27006///
27007/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_unpackhi_epi64&expand=6025)
27008#[inline]
27009#[target_feature(enable = "avx512f,avx512vl")]
27010#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27011#[cfg_attr(test, assert_instr(vpunpckhqdq))]
27012#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27013pub const fn _mm256_mask_unpackhi_epi64(
27014 src: __m256i,
27015 k: __mmask8,
27016 a: __m256i,
27017 b: __m256i,
27018) -> __m256i {
27019 unsafe {
27020 let unpackhi: Simd = _mm256_unpackhi_epi64(a, b).as_i64x4();
27021 transmute(src:simd_select_bitmask(m:k, yes:unpackhi, no:src.as_i64x4()))
27022 }
27023}
27024
27025/// Unpack and interleave 64-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27026///
27027/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_unpackhi_epi64&expand=6026)
27028#[inline]
27029#[target_feature(enable = "avx512f,avx512vl")]
27030#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27031#[cfg_attr(test, assert_instr(vpunpckhqdq))]
27032#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27033pub const fn _mm256_maskz_unpackhi_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
27034 unsafe {
27035 let unpackhi: Simd = _mm256_unpackhi_epi64(a, b).as_i64x4();
27036 transmute(src:simd_select_bitmask(m:k, yes:unpackhi, no:i64x4::ZERO))
27037 }
27038}
27039
27040/// Unpack and interleave 64-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27041///
27042/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_unpackhi_epi64&expand=6022)
27043#[inline]
27044#[target_feature(enable = "avx512f,avx512vl")]
27045#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27046#[cfg_attr(test, assert_instr(vpunpckhqdq))]
27047#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27048pub const fn _mm_mask_unpackhi_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
27049 unsafe {
27050 let unpackhi: Simd = _mm_unpackhi_epi64(a, b).as_i64x2();
27051 transmute(src:simd_select_bitmask(m:k, yes:unpackhi, no:src.as_i64x2()))
27052 }
27053}
27054
27055/// Unpack and interleave 64-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27056///
27057/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_unpackhi_epi64&expand=6023)
27058#[inline]
27059#[target_feature(enable = "avx512f,avx512vl")]
27060#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27061#[cfg_attr(test, assert_instr(vpunpckhqdq))]
27062#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27063pub const fn _mm_maskz_unpackhi_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
27064 unsafe {
27065 let unpackhi: Simd = _mm_unpackhi_epi64(a, b).as_i64x2();
27066 transmute(src:simd_select_bitmask(m:k, yes:unpackhi, no:i64x2::ZERO))
27067 }
27068}
27069
27070/// Unpack and interleave single-precision (32-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst.
27071///
27072/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_unpackhi_ps&expand=6060)
27073#[inline]
27074#[target_feature(enable = "avx512f")]
27075#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27076#[cfg_attr(test, assert_instr(vunpckhps))]
27077#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27078pub const fn _mm512_unpackhi_ps(a: __m512, b: __m512) -> __m512 {
27079 unsafe {
27080 #[rustfmt::skip]
27081 simd_shuffle!(
27082 a, b,
27083 [ 2, 18, 3, 19,
27084 2 + 4, 18 + 4, 3 + 4, 19 + 4,
27085 2 + 8, 18 + 8, 3 + 8, 19 + 8,
27086 2 + 12, 18 + 12, 3 + 12, 19 + 12],
27087 )
27088 }
27089}
27090
27091/// Unpack and interleave single-precision (32-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27092///
27093/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_unpackhi_ps&expand=6058)
27094#[inline]
27095#[target_feature(enable = "avx512f")]
27096#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27097#[cfg_attr(test, assert_instr(vunpckhps))]
27098#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27099pub const fn _mm512_mask_unpackhi_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
27100 unsafe {
27101 let unpackhi: Simd = _mm512_unpackhi_ps(a, b).as_f32x16();
27102 transmute(src:simd_select_bitmask(m:k, yes:unpackhi, no:src.as_f32x16()))
27103 }
27104}
27105
27106/// Unpack and interleave single-precision (32-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27107///
27108/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_unpackhi_ps&expand=6059)
27109#[inline]
27110#[target_feature(enable = "avx512f")]
27111#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27112#[cfg_attr(test, assert_instr(vunpckhps))]
27113#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27114pub const fn _mm512_maskz_unpackhi_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
27115 unsafe {
27116 let unpackhi: Simd = _mm512_unpackhi_ps(a, b).as_f32x16();
27117 transmute(src:simd_select_bitmask(m:k, yes:unpackhi, no:f32x16::ZERO))
27118 }
27119}
27120
27121/// Unpack and interleave single-precision (32-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27122///
27123/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_unpackhi_ps&expand=6055)
27124#[inline]
27125#[target_feature(enable = "avx512f,avx512vl")]
27126#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27127#[cfg_attr(test, assert_instr(vunpckhps))]
27128#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27129pub const fn _mm256_mask_unpackhi_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
27130 unsafe {
27131 let unpackhi: Simd = _mm256_unpackhi_ps(a, b).as_f32x8();
27132 transmute(src:simd_select_bitmask(m:k, yes:unpackhi, no:src.as_f32x8()))
27133 }
27134}
27135
27136/// Unpack and interleave single-precision (32-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27137///
27138/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_unpackhi_ps&expand=6056)
27139#[inline]
27140#[target_feature(enable = "avx512f,avx512vl")]
27141#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27142#[cfg_attr(test, assert_instr(vunpckhps))]
27143#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27144pub const fn _mm256_maskz_unpackhi_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
27145 unsafe {
27146 let unpackhi: Simd = _mm256_unpackhi_ps(a, b).as_f32x8();
27147 transmute(src:simd_select_bitmask(m:k, yes:unpackhi, no:f32x8::ZERO))
27148 }
27149}
27150
27151/// Unpack and interleave single-precision (32-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27152///
27153/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_unpackhi_ps&expand=6052)
27154#[inline]
27155#[target_feature(enable = "avx512f,avx512vl")]
27156#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27157#[cfg_attr(test, assert_instr(vunpckhps))]
27158#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27159pub const fn _mm_mask_unpackhi_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
27160 unsafe {
27161 let unpackhi: Simd = _mm_unpackhi_ps(a, b).as_f32x4();
27162 transmute(src:simd_select_bitmask(m:k, yes:unpackhi, no:src.as_f32x4()))
27163 }
27164}
27165
27166/// Unpack and interleave single-precision (32-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27167///
27168/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_unpackhi_ps&expand=6053)
27169#[inline]
27170#[target_feature(enable = "avx512f,avx512vl")]
27171#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27172#[cfg_attr(test, assert_instr(vunpckhps))]
27173#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27174pub const fn _mm_maskz_unpackhi_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
27175 unsafe {
27176 let unpackhi: Simd = _mm_unpackhi_ps(a, b).as_f32x4();
27177 transmute(src:simd_select_bitmask(m:k, yes:unpackhi, no:f32x4::ZERO))
27178 }
27179}
27180
27181/// Unpack and interleave double-precision (64-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst.
27182///
27183/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_unpackhi_pd&expand=6048)
27184#[inline]
27185#[target_feature(enable = "avx512f")]
27186#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27187#[cfg_attr(test, assert_instr(vunpckhpd))]
27188#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27189pub const fn _mm512_unpackhi_pd(a: __m512d, b: __m512d) -> __m512d {
27190 unsafe { simd_shuffle!(a, b, [1, 9, 1 + 2, 9 + 2, 1 + 4, 9 + 4, 1 + 6, 9 + 6]) }
27191}
27192
27193/// Unpack and interleave double-precision (64-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27194///
27195/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_unpackhi_pd&expand=6046)
27196#[inline]
27197#[target_feature(enable = "avx512f")]
27198#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27199#[cfg_attr(test, assert_instr(vunpckhpd))]
27200#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27201pub const fn _mm512_mask_unpackhi_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
27202 unsafe {
27203 let unpackhi: Simd = _mm512_unpackhi_pd(a, b).as_f64x8();
27204 transmute(src:simd_select_bitmask(m:k, yes:unpackhi, no:src.as_f64x8()))
27205 }
27206}
27207
27208/// Unpack and interleave double-precision (64-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27209///
27210/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_unpackhi_pd&expand=6047)
27211#[inline]
27212#[target_feature(enable = "avx512f")]
27213#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27214#[cfg_attr(test, assert_instr(vunpckhpd))]
27215#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27216pub const fn _mm512_maskz_unpackhi_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
27217 unsafe {
27218 let unpackhi: Simd = _mm512_unpackhi_pd(a, b).as_f64x8();
27219 transmute(src:simd_select_bitmask(m:k, yes:unpackhi, no:f64x8::ZERO))
27220 }
27221}
27222
27223/// Unpack and interleave double-precision (64-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27224///
27225/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_unpackhi_pd&expand=6043)
27226#[inline]
27227#[target_feature(enable = "avx512f,avx512vl")]
27228#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27229#[cfg_attr(test, assert_instr(vunpckhpd))]
27230#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27231pub const fn _mm256_mask_unpackhi_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
27232 unsafe {
27233 let unpackhi: Simd = _mm256_unpackhi_pd(a, b).as_f64x4();
27234 transmute(src:simd_select_bitmask(m:k, yes:unpackhi, no:src.as_f64x4()))
27235 }
27236}
27237
27238/// Unpack and interleave double-precision (64-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27239///
27240/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_unpackhi_pd&expand=6044)
27241#[inline]
27242#[target_feature(enable = "avx512f,avx512vl")]
27243#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27244#[cfg_attr(test, assert_instr(vunpckhpd))]
27245#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27246pub const fn _mm256_maskz_unpackhi_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
27247 unsafe {
27248 let unpackhi: Simd = _mm256_unpackhi_pd(a, b).as_f64x4();
27249 transmute(src:simd_select_bitmask(m:k, yes:unpackhi, no:f64x4::ZERO))
27250 }
27251}
27252
27253/// Unpack and interleave double-precision (64-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27254///
27255/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_unpackhi_pd&expand=6040)
27256#[inline]
27257#[target_feature(enable = "avx512f,avx512vl")]
27258#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27259#[cfg_attr(test, assert_instr(vunpckhpd))]
27260#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27261pub const fn _mm_mask_unpackhi_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
27262 unsafe {
27263 let unpackhi: Simd = _mm_unpackhi_pd(a, b).as_f64x2();
27264 transmute(src:simd_select_bitmask(m:k, yes:unpackhi, no:src.as_f64x2()))
27265 }
27266}
27267
27268/// Unpack and interleave double-precision (64-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27269///
27270/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_unpackhi_pd&expand=6041)
27271#[inline]
27272#[target_feature(enable = "avx512f,avx512vl")]
27273#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27274#[cfg_attr(test, assert_instr(vunpckhpd))]
27275#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27276pub const fn _mm_maskz_unpackhi_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
27277 unsafe {
27278 let unpackhi: Simd = _mm_unpackhi_pd(a, b).as_f64x2();
27279 transmute(src:simd_select_bitmask(m:k, yes:unpackhi, no:f64x2::ZERO))
27280 }
27281}
27282
27283/// Unpack and interleave 32-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst.
27284///
27285/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_unpacklo_epi32&expand=6078)
27286#[inline]
27287#[target_feature(enable = "avx512f")]
27288#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27289#[cfg_attr(test, assert_instr(vunpcklps))] //should be vpunpckldq
27290#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27291pub const fn _mm512_unpacklo_epi32(a: __m512i, b: __m512i) -> __m512i {
27292 unsafe {
27293 let a: Simd = a.as_i32x16();
27294 let b: Simd = b.as_i32x16();
27295 #[rustfmt::skip]
27296 let r: i32x16 = simd_shuffle!(
27297 a, b,
27298 [ 0, 16, 1, 17,
27299 0 + 4, 16 + 4, 1 + 4, 17 + 4,
27300 0 + 8, 16 + 8, 1 + 8, 17 + 8,
27301 0 + 12, 16 + 12, 1 + 12, 17 + 12],
27302 );
27303 transmute(src:r)
27304 }
27305}
27306
27307/// Unpack and interleave 32-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27308///
27309/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_unpacklo_epi32&expand=6076)
27310#[inline]
27311#[target_feature(enable = "avx512f")]
27312#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27313#[cfg_attr(test, assert_instr(vpunpckldq))]
27314#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27315pub const fn _mm512_mask_unpacklo_epi32(
27316 src: __m512i,
27317 k: __mmask16,
27318 a: __m512i,
27319 b: __m512i,
27320) -> __m512i {
27321 unsafe {
27322 let unpacklo: Simd = _mm512_unpacklo_epi32(a, b).as_i32x16();
27323 transmute(src:simd_select_bitmask(m:k, yes:unpacklo, no:src.as_i32x16()))
27324 }
27325}
27326
27327/// Unpack and interleave 32-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27328///
27329/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_unpacklo_epi32&expand=6077)
27330#[inline]
27331#[target_feature(enable = "avx512f")]
27332#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27333#[cfg_attr(test, assert_instr(vpunpckldq))]
27334#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27335pub const fn _mm512_maskz_unpacklo_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
27336 unsafe {
27337 let unpacklo: Simd = _mm512_unpacklo_epi32(a, b).as_i32x16();
27338 transmute(src:simd_select_bitmask(m:k, yes:unpacklo, no:i32x16::ZERO))
27339 }
27340}
27341
27342/// Unpack and interleave 32-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27343///
27344/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_unpacklo_epi32&expand=6073)
27345#[inline]
27346#[target_feature(enable = "avx512f,avx512vl")]
27347#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27348#[cfg_attr(test, assert_instr(vpunpckldq))]
27349#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27350pub const fn _mm256_mask_unpacklo_epi32(
27351 src: __m256i,
27352 k: __mmask8,
27353 a: __m256i,
27354 b: __m256i,
27355) -> __m256i {
27356 unsafe {
27357 let unpacklo: Simd = _mm256_unpacklo_epi32(a, b).as_i32x8();
27358 transmute(src:simd_select_bitmask(m:k, yes:unpacklo, no:src.as_i32x8()))
27359 }
27360}
27361
27362/// Unpack and interleave 32-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27363///
27364/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_unpacklo_epi32&expand=6074)
27365#[inline]
27366#[target_feature(enable = "avx512f,avx512vl")]
27367#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27368#[cfg_attr(test, assert_instr(vpunpckldq))]
27369#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27370pub const fn _mm256_maskz_unpacklo_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
27371 unsafe {
27372 let unpacklo: Simd = _mm256_unpacklo_epi32(a, b).as_i32x8();
27373 transmute(src:simd_select_bitmask(m:k, yes:unpacklo, no:i32x8::ZERO))
27374 }
27375}
27376
27377/// Unpack and interleave 32-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27378///
27379/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_unpacklo_epi32&expand=6070)
27380#[inline]
27381#[target_feature(enable = "avx512f,avx512vl")]
27382#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27383#[cfg_attr(test, assert_instr(vpunpckldq))]
27384#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27385pub const fn _mm_mask_unpacklo_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
27386 unsafe {
27387 let unpacklo: Simd = _mm_unpacklo_epi32(a, b).as_i32x4();
27388 transmute(src:simd_select_bitmask(m:k, yes:unpacklo, no:src.as_i32x4()))
27389 }
27390}
27391
27392/// Unpack and interleave 32-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27393///
27394/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_unpacklo_epi32&expand=6071)
27395#[inline]
27396#[target_feature(enable = "avx512f,avx512vl")]
27397#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27398#[cfg_attr(test, assert_instr(vpunpckldq))]
27399#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27400pub const fn _mm_maskz_unpacklo_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
27401 unsafe {
27402 let unpacklo: Simd = _mm_unpacklo_epi32(a, b).as_i32x4();
27403 transmute(src:simd_select_bitmask(m:k, yes:unpacklo, no:i32x4::ZERO))
27404 }
27405}
27406
27407/// Unpack and interleave 64-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst.
27408///
27409/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_unpacklo_epi64&expand=6087)
27410#[inline]
27411#[target_feature(enable = "avx512f")]
27412#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27413#[cfg_attr(test, assert_instr(vunpcklpd))] //should be vpunpcklqdq
27414#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27415pub const fn _mm512_unpacklo_epi64(a: __m512i, b: __m512i) -> __m512i {
27416 unsafe { simd_shuffle!(a, b, [0, 8, 0 + 2, 8 + 2, 0 + 4, 8 + 4, 0 + 6, 8 + 6]) }
27417}
27418
27419/// Unpack and interleave 64-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27420///
27421/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_unpacklo_epi64&expand=6085)
27422#[inline]
27423#[target_feature(enable = "avx512f")]
27424#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27425#[cfg_attr(test, assert_instr(vpunpcklqdq))]
27426#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27427pub const fn _mm512_mask_unpacklo_epi64(
27428 src: __m512i,
27429 k: __mmask8,
27430 a: __m512i,
27431 b: __m512i,
27432) -> __m512i {
27433 unsafe {
27434 let unpacklo: Simd = _mm512_unpacklo_epi64(a, b).as_i64x8();
27435 transmute(src:simd_select_bitmask(m:k, yes:unpacklo, no:src.as_i64x8()))
27436 }
27437}
27438
27439/// Unpack and interleave 64-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27440///
27441/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_unpacklo_epi64&expand=6086)
27442#[inline]
27443#[target_feature(enable = "avx512f")]
27444#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27445#[cfg_attr(test, assert_instr(vpunpcklqdq))]
27446#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27447pub const fn _mm512_maskz_unpacklo_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
27448 unsafe {
27449 let unpacklo: Simd = _mm512_unpacklo_epi64(a, b).as_i64x8();
27450 transmute(src:simd_select_bitmask(m:k, yes:unpacklo, no:i64x8::ZERO))
27451 }
27452}
27453
27454/// Unpack and interleave 64-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27455///
27456/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_unpacklo_epi64&expand=6082)
27457#[inline]
27458#[target_feature(enable = "avx512f,avx512vl")]
27459#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27460#[cfg_attr(test, assert_instr(vpunpcklqdq))]
27461#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27462pub const fn _mm256_mask_unpacklo_epi64(
27463 src: __m256i,
27464 k: __mmask8,
27465 a: __m256i,
27466 b: __m256i,
27467) -> __m256i {
27468 unsafe {
27469 let unpacklo: Simd = _mm256_unpacklo_epi64(a, b).as_i64x4();
27470 transmute(src:simd_select_bitmask(m:k, yes:unpacklo, no:src.as_i64x4()))
27471 }
27472}
27473
27474/// Unpack and interleave 64-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27475///
27476/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_unpacklo_epi64&expand=6083)
27477#[inline]
27478#[target_feature(enable = "avx512f,avx512vl")]
27479#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27480#[cfg_attr(test, assert_instr(vpunpcklqdq))]
27481#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27482pub const fn _mm256_maskz_unpacklo_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
27483 unsafe {
27484 let unpacklo: Simd = _mm256_unpacklo_epi64(a, b).as_i64x4();
27485 transmute(src:simd_select_bitmask(m:k, yes:unpacklo, no:i64x4::ZERO))
27486 }
27487}
27488
27489/// Unpack and interleave 64-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27490///
27491/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_unpacklo_epi64&expand=6079)
27492#[inline]
27493#[target_feature(enable = "avx512f,avx512vl")]
27494#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27495#[cfg_attr(test, assert_instr(vpunpcklqdq))]
27496#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27497pub const fn _mm_mask_unpacklo_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
27498 unsafe {
27499 let unpacklo: Simd = _mm_unpacklo_epi64(a, b).as_i64x2();
27500 transmute(src:simd_select_bitmask(m:k, yes:unpacklo, no:src.as_i64x2()))
27501 }
27502}
27503
27504/// Unpack and interleave 64-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27505///
27506/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_unpacklo_epi64&expand=6080)
27507#[inline]
27508#[target_feature(enable = "avx512f,avx512vl")]
27509#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27510#[cfg_attr(test, assert_instr(vpunpcklqdq))]
27511#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27512pub const fn _mm_maskz_unpacklo_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
27513 unsafe {
27514 let unpacklo: Simd = _mm_unpacklo_epi64(a, b).as_i64x2();
27515 transmute(src:simd_select_bitmask(m:k, yes:unpacklo, no:i64x2::ZERO))
27516 }
27517}
27518
27519/// Unpack and interleave single-precision (32-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst.
27520///
27521/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_unpacklo_ps&expand=6117)
27522#[inline]
27523#[target_feature(enable = "avx512f")]
27524#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27525#[cfg_attr(test, assert_instr(vunpcklps))]
27526#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27527pub const fn _mm512_unpacklo_ps(a: __m512, b: __m512) -> __m512 {
27528 unsafe {
27529 #[rustfmt::skip]
27530 simd_shuffle!(a, b,
27531 [ 0, 16, 1, 17,
27532 0 + 4, 16 + 4, 1 + 4, 17 + 4,
27533 0 + 8, 16 + 8, 1 + 8, 17 + 8,
27534 0 + 12, 16 + 12, 1 + 12, 17 + 12],
27535 )
27536 }
27537}
27538
27539/// Unpack and interleave single-precision (32-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27540///
27541/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_unpacklo_ps&expand=6115)
27542#[inline]
27543#[target_feature(enable = "avx512f")]
27544#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27545#[cfg_attr(test, assert_instr(vunpcklps))]
27546#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27547pub const fn _mm512_mask_unpacklo_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
27548 unsafe {
27549 let unpacklo: Simd = _mm512_unpacklo_ps(a, b).as_f32x16();
27550 transmute(src:simd_select_bitmask(m:k, yes:unpacklo, no:src.as_f32x16()))
27551 }
27552}
27553
27554/// Unpack and interleave single-precision (32-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27555///
27556/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_unpacklo_ps&expand=6116)
27557#[inline]
27558#[target_feature(enable = "avx512f")]
27559#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27560#[cfg_attr(test, assert_instr(vunpcklps))]
27561#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27562pub const fn _mm512_maskz_unpacklo_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
27563 unsafe {
27564 let unpacklo: Simd = _mm512_unpacklo_ps(a, b).as_f32x16();
27565 transmute(src:simd_select_bitmask(m:k, yes:unpacklo, no:f32x16::ZERO))
27566 }
27567}
27568
27569/// Unpack and interleave single-precision (32-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27570///
27571/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_unpacklo_ps&expand=6112)
27572#[inline]
27573#[target_feature(enable = "avx512f,avx512vl")]
27574#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27575#[cfg_attr(test, assert_instr(vunpcklps))]
27576#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27577pub const fn _mm256_mask_unpacklo_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
27578 unsafe {
27579 let unpacklo: Simd = _mm256_unpacklo_ps(a, b).as_f32x8();
27580 transmute(src:simd_select_bitmask(m:k, yes:unpacklo, no:src.as_f32x8()))
27581 }
27582}
27583
27584/// Unpack and interleave single-precision (32-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27585///
27586/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_unpacklo_ps&expand=6113)
27587#[inline]
27588#[target_feature(enable = "avx512f,avx512vl")]
27589#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27590#[cfg_attr(test, assert_instr(vunpcklps))]
27591#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27592pub const fn _mm256_maskz_unpacklo_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
27593 unsafe {
27594 let unpacklo: Simd = _mm256_unpacklo_ps(a, b).as_f32x8();
27595 transmute(src:simd_select_bitmask(m:k, yes:unpacklo, no:f32x8::ZERO))
27596 }
27597}
27598
27599/// Unpack and interleave single-precision (32-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27600///
27601/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_unpacklo_ps&expand=6109)
27602#[inline]
27603#[target_feature(enable = "avx512f,avx512vl")]
27604#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27605#[cfg_attr(test, assert_instr(vunpcklps))]
27606#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27607pub const fn _mm_mask_unpacklo_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
27608 unsafe {
27609 let unpacklo: Simd = _mm_unpacklo_ps(a, b).as_f32x4();
27610 transmute(src:simd_select_bitmask(m:k, yes:unpacklo, no:src.as_f32x4()))
27611 }
27612}
27613
27614/// Unpack and interleave single-precision (32-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27615///
27616/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_unpacklo_ps&expand=6110)
27617#[inline]
27618#[target_feature(enable = "avx512f,avx512vl")]
27619#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27620#[cfg_attr(test, assert_instr(vunpcklps))]
27621#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27622pub const fn _mm_maskz_unpacklo_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
27623 unsafe {
27624 let unpacklo: Simd = _mm_unpacklo_ps(a, b).as_f32x4();
27625 transmute(src:simd_select_bitmask(m:k, yes:unpacklo, no:f32x4::ZERO))
27626 }
27627}
27628
27629/// Unpack and interleave double-precision (64-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst.
27630///
27631/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_unpacklo_pd&expand=6105)
27632#[inline]
27633#[target_feature(enable = "avx512f")]
27634#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27635#[cfg_attr(test, assert_instr(vunpcklpd))]
27636#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27637pub const fn _mm512_unpacklo_pd(a: __m512d, b: __m512d) -> __m512d {
27638 unsafe { simd_shuffle!(a, b, [0, 8, 0 + 2, 8 + 2, 0 + 4, 8 + 4, 0 + 6, 8 + 6]) }
27639}
27640
27641/// Unpack and interleave double-precision (64-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27642///
27643/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_unpacklo_pd&expand=6103)
27644#[inline]
27645#[target_feature(enable = "avx512f")]
27646#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27647#[cfg_attr(test, assert_instr(vunpcklpd))]
27648#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27649pub const fn _mm512_mask_unpacklo_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
27650 unsafe {
27651 let unpacklo: Simd = _mm512_unpacklo_pd(a, b).as_f64x8();
27652 transmute(src:simd_select_bitmask(m:k, yes:unpacklo, no:src.as_f64x8()))
27653 }
27654}
27655
27656/// Unpack and interleave double-precision (64-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27657///
27658/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_unpacklo_pd&expand=6104)
27659#[inline]
27660#[target_feature(enable = "avx512f")]
27661#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27662#[cfg_attr(test, assert_instr(vunpcklpd))]
27663#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27664pub const fn _mm512_maskz_unpacklo_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
27665 unsafe {
27666 let unpacklo: Simd = _mm512_unpacklo_pd(a, b).as_f64x8();
27667 transmute(src:simd_select_bitmask(m:k, yes:unpacklo, no:f64x8::ZERO))
27668 }
27669}
27670
27671/// Unpack and interleave double-precision (64-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27672///
27673/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_unpacklo_pd&expand=6100)
27674#[inline]
27675#[target_feature(enable = "avx512f,avx512vl")]
27676#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27677#[cfg_attr(test, assert_instr(vunpcklpd))]
27678#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27679pub const fn _mm256_mask_unpacklo_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
27680 unsafe {
27681 let unpacklo: Simd = _mm256_unpacklo_pd(a, b).as_f64x4();
27682 transmute(src:simd_select_bitmask(m:k, yes:unpacklo, no:src.as_f64x4()))
27683 }
27684}
27685
27686/// Unpack and interleave double-precision (64-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27687///
27688/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_unpacklo_pd&expand=6101)
27689#[inline]
27690#[target_feature(enable = "avx512f,avx512vl")]
27691#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27692#[cfg_attr(test, assert_instr(vunpcklpd))]
27693#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27694pub const fn _mm256_maskz_unpacklo_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
27695 unsafe {
27696 let unpacklo: Simd = _mm256_unpacklo_pd(a, b).as_f64x4();
27697 transmute(src:simd_select_bitmask(m:k, yes:unpacklo, no:f64x4::ZERO))
27698 }
27699}
27700
27701/// Unpack and interleave double-precision (64-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27702///
27703/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_unpacklo_pd&expand=6097)
27704#[inline]
27705#[target_feature(enable = "avx512f,avx512vl")]
27706#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27707#[cfg_attr(test, assert_instr(vunpcklpd))]
27708#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27709pub const fn _mm_mask_unpacklo_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
27710 unsafe {
27711 let unpacklo: Simd = _mm_unpacklo_pd(a, b).as_f64x2();
27712 transmute(src:simd_select_bitmask(m:k, yes:unpacklo, no:src.as_f64x2()))
27713 }
27714}
27715
27716/// Unpack and interleave double-precision (64-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27717///
27718/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_unpacklo_pd&expand=6098)
27719#[inline]
27720#[target_feature(enable = "avx512f,avx512vl")]
27721#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27722#[cfg_attr(test, assert_instr(vunpcklpd))]
27723#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27724pub const fn _mm_maskz_unpacklo_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
27725 unsafe {
27726 let unpacklo: Simd = _mm_unpacklo_pd(a, b).as_f64x2();
27727 transmute(src:simd_select_bitmask(m:k, yes:unpacklo, no:f64x2::ZERO))
27728 }
27729}
27730
27731/// Cast vector of type __m128 to type __m512; the upper 384 bits of the result are indeterminate.
27732///
27733/// In the Intel documentation, the upper bits are declared to be "undefined".
27734/// This is not equivalent to [`mem::MaybeUninit`]; instead, these bits are non-deterministically
27735/// set to some valid value. In practice, this is typically equivalent to [`mem::zeroed`].
27736///
27737/// This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
27738///
27739/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castps128_ps512&expand=621)
27740#[inline]
27741#[target_feature(enable = "avx512f")]
27742#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27743#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27744pub const fn _mm512_castps128_ps512(a: __m128) -> __m512 {
27745 unsafe {
27746 simd_shuffle!(
27747 a,
27748 _mm_undefined_ps(),
27749 [0, 1, 2, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4],
27750 )
27751 }
27752}
27753
27754/// Cast vector of type __m256 to type __m512; the upper 256 bits of the result are indeterminate.
27755///
27756/// In the Intel documentation, the upper bits are declared to be "undefined".
27757/// This is not equivalent to [`mem::MaybeUninit`]; instead, these bits are non-deterministically
27758/// set to some valid value. In practice, this is typically equivalent to [`mem::zeroed`].
27759///
27760/// This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
27761///
27762/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castps256_ps512&expand=623)
27763#[inline]
27764#[target_feature(enable = "avx512f")]
27765#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27766#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27767pub const fn _mm512_castps256_ps512(a: __m256) -> __m512 {
27768 unsafe {
27769 simd_shuffle!(
27770 a,
27771 _mm256_undefined_ps(),
27772 [0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 8, 8, 8],
27773 )
27774 }
27775}
27776
27777/// Cast vector of type __m128 to type __m512; the upper 384 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
27778///
27779/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_zextps128_ps512&expand=6196)
27780#[inline]
27781#[target_feature(enable = "avx512f")]
27782#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27783#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27784pub const fn _mm512_zextps128_ps512(a: __m128) -> __m512 {
27785 unsafe {
27786 simd_shuffle!(
27787 a,
27788 _mm_set1_ps(0.),
27789 [0, 1, 2, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4],
27790 )
27791 }
27792}
27793
27794/// Cast vector of type __m256 to type __m512; the upper 256 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
27795///
27796/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_zextps256_ps512&expand=6197)
27797#[inline]
27798#[target_feature(enable = "avx512f")]
27799#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27800#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27801pub const fn _mm512_zextps256_ps512(a: __m256) -> __m512 {
27802 unsafe {
27803 simd_shuffle!(
27804 a,
27805 _mm256_set1_ps(0.),
27806 [0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 8, 8, 8],
27807 )
27808 }
27809}
27810
27811/// Cast vector of type __m512 to type __m128. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
27812///
27813/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castps512_ps128&expand=624)
27814#[inline]
27815#[target_feature(enable = "avx512f")]
27816#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27817#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27818pub const fn _mm512_castps512_ps128(a: __m512) -> __m128 {
27819 unsafe { simd_shuffle!(a, a, [0, 1, 2, 3]) }
27820}
27821
27822/// Cast vector of type __m512 to type __m256. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
27823///
27824/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castps512_ps256&expand=625)
27825#[inline]
27826#[target_feature(enable = "avx512f")]
27827#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27828#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27829pub const fn _mm512_castps512_ps256(a: __m512) -> __m256 {
27830 unsafe { simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]) }
27831}
27832
27833/// Cast vector of type __m512 to type __m512d. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
27834///
27835/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castps_pd&expand=616)
27836#[inline]
27837#[target_feature(enable = "avx512f")]
27838#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27839#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27840pub const fn _mm512_castps_pd(a: __m512) -> __m512d {
27841 unsafe { transmute(src:a) }
27842}
27843
27844/// Cast vector of type __m512 to type __m512i. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
27845///
27846/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castps_si512&expand=619)
27847#[inline]
27848#[target_feature(enable = "avx512f")]
27849#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27850#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27851pub const fn _mm512_castps_si512(a: __m512) -> __m512i {
27852 unsafe { transmute(src:a) }
27853}
27854
27855/// Cast vector of type __m128d to type __m512d; the upper 384 bits of the result are indeterminate.
27856///
27857/// In the Intel documentation, the upper bits are declared to be "undefined".
27858/// This is not equivalent to [`mem::MaybeUninit`]; instead, these bits are non-deterministically
27859/// set to some valid value. In practice, this is typically equivalent to [`mem::zeroed`].
27860///
27861/// This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
27862///
27863/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castpd128_pd512&expand=609)
27864#[inline]
27865#[target_feature(enable = "avx512f")]
27866#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27867#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27868pub const fn _mm512_castpd128_pd512(a: __m128d) -> __m512d {
27869 unsafe { simd_shuffle!(a, _mm_undefined_pd(), [0, 1, 2, 2, 2, 2, 2, 2]) }
27870}
27871
27872/// Cast vector of type __m256d to type __m512d; the upper 256 bits of the result are indeterminate.
27873///
27874/// In the Intel documentation, the upper bits are declared to be "undefined".
27875/// This is not equivalent to [`mem::MaybeUninit`]; instead, these bits are non-deterministically
27876/// set to some valid value. In practice, this is typically equivalent to [`mem::zeroed`].
27877///
27878/// This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
27879///
27880/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castpd256_pd512&expand=611)
27881#[inline]
27882#[target_feature(enable = "avx512f")]
27883#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27884#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27885pub const fn _mm512_castpd256_pd512(a: __m256d) -> __m512d {
27886 unsafe { simd_shuffle!(a, _mm256_undefined_pd(), [0, 1, 2, 3, 4, 4, 4, 4]) }
27887}
27888
27889/// Cast vector of type __m128d to type __m512d; the upper 384 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
27890///
27891/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_zextpd128_pd512&expand=6193)
27892#[inline]
27893#[target_feature(enable = "avx512f")]
27894#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27895#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27896pub const fn _mm512_zextpd128_pd512(a: __m128d) -> __m512d {
27897 unsafe { simd_shuffle!(a, _mm_set1_pd(0.), [0, 1, 2, 2, 2, 2, 2, 2]) }
27898}
27899
27900/// Cast vector of type __m256d to type __m512d; the upper 256 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
27901///
27902/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_zextpd256_pd512&expand=6194)
27903#[inline]
27904#[target_feature(enable = "avx512f")]
27905#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27906#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27907pub const fn _mm512_zextpd256_pd512(a: __m256d) -> __m512d {
27908 unsafe { simd_shuffle!(a, _mm256_set1_pd(0.), [0, 1, 2, 3, 4, 4, 4, 4]) }
27909}
27910
27911/// Cast vector of type __m512d to type __m128d. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
27912///
27913/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castpd512_pd128&expand=612)
27914#[inline]
27915#[target_feature(enable = "avx512f")]
27916#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27917#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27918pub const fn _mm512_castpd512_pd128(a: __m512d) -> __m128d {
27919 unsafe { simd_shuffle!(a, a, [0, 1]) }
27920}
27921
27922/// Cast vector of type __m512d to type __m256d. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
27923///
27924/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castpd512_pd256&expand=613)
27925#[inline]
27926#[target_feature(enable = "avx512f")]
27927#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27928#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27929pub const fn _mm512_castpd512_pd256(a: __m512d) -> __m256d {
27930 unsafe { simd_shuffle!(a, a, [0, 1, 2, 3]) }
27931}
27932
27933/// Cast vector of type __m512d to type __m512. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
27934///
27935/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castpd_ps&expand=604)
27936#[inline]
27937#[target_feature(enable = "avx512f")]
27938#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27939#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27940pub const fn _mm512_castpd_ps(a: __m512d) -> __m512 {
27941 unsafe { transmute(src:a) }
27942}
27943
27944/// Cast vector of type __m512d to type __m512i. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
27945///
27946/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castpd_si512&expand=607)
27947#[inline]
27948#[target_feature(enable = "avx512f")]
27949#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27950#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27951pub const fn _mm512_castpd_si512(a: __m512d) -> __m512i {
27952 unsafe { transmute(src:a) }
27953}
27954
27955/// Cast vector of type __m128i to type __m512i; the upper 384 bits of the result are indeterminate.
27956///
27957/// In the Intel documentation, the upper bits are declared to be "undefined".
27958/// This is not equivalent to [`mem::MaybeUninit`]; instead, these bits are non-deterministically
27959/// set to some valid value. In practice, this is typically equivalent to [`mem::zeroed`].
27960///
27961/// This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
27962///
27963/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castsi128_si512&expand=629)
27964#[inline]
27965#[target_feature(enable = "avx512f")]
27966#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27967#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27968pub const fn _mm512_castsi128_si512(a: __m128i) -> __m512i {
27969 unsafe { simd_shuffle!(a, _mm_undefined_si128(), [0, 1, 2, 2, 2, 2, 2, 2]) }
27970}
27971
27972/// Cast vector of type __m256i to type __m512i; the upper 256 bits of the result are indeterminate.
27973///
27974/// In the Intel documentation, the upper bits are declared to be "undefined".
27975/// This is not equivalent to [`mem::MaybeUninit`]; instead, these bits are non-deterministically
27976/// set to some valid value. In practice, this is typically equivalent to [`mem::zeroed`].
27977///
27978/// This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
27979///
27980/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castsi256_si512&expand=633)
27981#[inline]
27982#[target_feature(enable = "avx512f")]
27983#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27984#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27985pub const fn _mm512_castsi256_si512(a: __m256i) -> __m512i {
27986 unsafe { simd_shuffle!(a, _mm256_undefined_si256(), [0, 1, 2, 3, 4, 4, 4, 4]) }
27987}
27988
27989/// Cast vector of type __m128i to type __m512i; the upper 384 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
27990///
27991/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_zextsi128_si512&expand=6199)
27992#[inline]
27993#[target_feature(enable = "avx512f")]
27994#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27995#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27996pub const fn _mm512_zextsi128_si512(a: __m128i) -> __m512i {
27997 unsafe { simd_shuffle!(a, _mm_setzero_si128(), [0, 1, 2, 2, 2, 2, 2, 2]) }
27998}
27999
28000/// Cast vector of type __m256i to type __m512i; the upper 256 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
28001///
28002/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_zextsi256_si512&expand=6200)
28003#[inline]
28004#[target_feature(enable = "avx512f")]
28005#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28006#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28007pub const fn _mm512_zextsi256_si512(a: __m256i) -> __m512i {
28008 unsafe { simd_shuffle!(a, _mm256_setzero_si256(), [0, 1, 2, 3, 4, 4, 4, 4]) }
28009}
28010
28011/// Cast vector of type __m512i to type __m128i. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
28012///
28013/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castsi512_si128&expand=636)
28014#[inline]
28015#[target_feature(enable = "avx512f")]
28016#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28017#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28018pub const fn _mm512_castsi512_si128(a: __m512i) -> __m128i {
28019 unsafe { simd_shuffle!(a, a, [0, 1]) }
28020}
28021
28022/// Cast vector of type __m512i to type __m256i. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
28023///
28024/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castsi512_si256&expand=637)
28025#[inline]
28026#[target_feature(enable = "avx512f")]
28027#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28028#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28029pub const fn _mm512_castsi512_si256(a: __m512i) -> __m256i {
28030 unsafe { simd_shuffle!(a, a, [0, 1, 2, 3]) }
28031}
28032
28033/// Cast vector of type __m512i to type __m512. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
28034///
28035/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castsi512_ps&expand=635)
28036#[inline]
28037#[target_feature(enable = "avx512f")]
28038#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28039#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28040pub const fn _mm512_castsi512_ps(a: __m512i) -> __m512 {
28041 unsafe { transmute(src:a) }
28042}
28043
28044/// Cast vector of type __m512i to type __m512d. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
28045///
28046/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castsi512_pd&expand=634)
28047#[inline]
28048#[target_feature(enable = "avx512f")]
28049#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28050#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28051pub const fn _mm512_castsi512_pd(a: __m512i) -> __m512d {
28052 unsafe { transmute(src:a) }
28053}
28054
28055/// Copy the lower 32-bit integer in a to dst.
28056///
28057/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtsi512_si32&expand=1882)
28058#[inline]
28059#[target_feature(enable = "avx512f")]
28060#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28061#[cfg_attr(test, assert_instr(vmovd))]
28062#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28063pub const fn _mm512_cvtsi512_si32(a: __m512i) -> i32 {
28064 unsafe { simd_extract!(a.as_i32x16(), 0) }
28065}
28066
28067/// Copy the lower single-precision (32-bit) floating-point element of a to dst.
28068///
28069/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtss_f32)
28070#[inline]
28071#[target_feature(enable = "avx512f")]
28072#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28073#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28074pub const fn _mm512_cvtss_f32(a: __m512) -> f32 {
28075 unsafe { simd_extract!(a, 0) }
28076}
28077
28078/// Copy the lower double-precision (64-bit) floating-point element of a to dst.
28079///
28080/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtsd_f64)
28081#[inline]
28082#[target_feature(enable = "avx512f")]
28083#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28084#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28085pub const fn _mm512_cvtsd_f64(a: __m512d) -> f64 {
28086 unsafe { simd_extract!(a, 0) }
28087}
28088
28089/// Broadcast the low packed 32-bit integer from a to all elements of dst.
28090///
28091/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcastd_epi32&expand=545)
28092#[inline]
28093#[target_feature(enable = "avx512f")]
28094#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28095#[cfg_attr(test, assert_instr(vbroadcast))] //should be vpbroadcastd
28096#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28097pub const fn _mm512_broadcastd_epi32(a: __m128i) -> __m512i {
28098 unsafe {
28099 let a: Simd = _mm512_castsi128_si512(a).as_i32x16();
28100 let ret: i32x16 = simd_shuffle!(a, a, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]);
28101 transmute(src:ret)
28102 }
28103}
28104
28105/// Broadcast the low packed 32-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28106///
28107/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcastd_epi32&expand=546)
28108#[inline]
28109#[target_feature(enable = "avx512f")]
28110#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28111#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastd
28112#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28113pub const fn _mm512_mask_broadcastd_epi32(src: __m512i, k: __mmask16, a: __m128i) -> __m512i {
28114 unsafe {
28115 let broadcast: Simd = _mm512_broadcastd_epi32(a).as_i32x16();
28116 transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:src.as_i32x16()))
28117 }
28118}
28119
28120/// Broadcast the low packed 32-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28121///
28122/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcastd_epi32&expand=547)
28123#[inline]
28124#[target_feature(enable = "avx512f")]
28125#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28126#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastd
28127#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28128pub const fn _mm512_maskz_broadcastd_epi32(k: __mmask16, a: __m128i) -> __m512i {
28129 unsafe {
28130 let broadcast: Simd = _mm512_broadcastd_epi32(a).as_i32x16();
28131 transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:i32x16::ZERO))
28132 }
28133}
28134
28135/// Broadcast the low packed 32-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28136///
28137/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_broadcastd_epi32&expand=543)
28138#[inline]
28139#[target_feature(enable = "avx512f,avx512vl")]
28140#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28141#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastd
28142#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28143pub const fn _mm256_mask_broadcastd_epi32(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
28144 unsafe {
28145 let broadcast: Simd = _mm256_broadcastd_epi32(a).as_i32x8();
28146 transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:src.as_i32x8()))
28147 }
28148}
28149
28150/// Broadcast the low packed 32-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28151///
28152/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_broadcastd_epi32&expand=544)
28153#[inline]
28154#[target_feature(enable = "avx512f,avx512vl")]
28155#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28156#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastd
28157#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28158pub const fn _mm256_maskz_broadcastd_epi32(k: __mmask8, a: __m128i) -> __m256i {
28159 unsafe {
28160 let broadcast: Simd = _mm256_broadcastd_epi32(a).as_i32x8();
28161 transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:i32x8::ZERO))
28162 }
28163}
28164
28165/// Broadcast the low packed 32-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28166///
28167/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_broadcastd_epi32&expand=540)
28168#[inline]
28169#[target_feature(enable = "avx512f,avx512vl")]
28170#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28171#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastd
28172#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28173pub const fn _mm_mask_broadcastd_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
28174 unsafe {
28175 let broadcast: Simd = _mm_broadcastd_epi32(a).as_i32x4();
28176 transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:src.as_i32x4()))
28177 }
28178}
28179
28180/// Broadcast the low packed 32-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28181///
28182/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_broadcastd_epi32&expand=541)
28183#[inline]
28184#[target_feature(enable = "avx512f,avx512vl")]
28185#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28186#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastd
28187#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28188pub const fn _mm_maskz_broadcastd_epi32(k: __mmask8, a: __m128i) -> __m128i {
28189 unsafe {
28190 let broadcast: Simd = _mm_broadcastd_epi32(a).as_i32x4();
28191 transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:i32x4::ZERO))
28192 }
28193}
28194
28195/// Broadcast the low packed 64-bit integer from a to all elements of dst.
28196///
28197/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcastq_epi64&expand=560)
28198#[inline]
28199#[target_feature(enable = "avx512f")]
28200#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28201#[cfg_attr(test, assert_instr(vbroadcast))] //should be vpbroadcastq
28202#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28203pub const fn _mm512_broadcastq_epi64(a: __m128i) -> __m512i {
28204 unsafe { simd_shuffle!(a, a, [0, 0, 0, 0, 0, 0, 0, 0]) }
28205}
28206
28207/// Broadcast the low packed 64-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28208///
28209/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcastq_epi64&expand=561)
28210#[inline]
28211#[target_feature(enable = "avx512f")]
28212#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28213#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastq
28214#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28215pub const fn _mm512_mask_broadcastq_epi64(src: __m512i, k: __mmask8, a: __m128i) -> __m512i {
28216 unsafe {
28217 let broadcast: Simd = _mm512_broadcastq_epi64(a).as_i64x8();
28218 transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:src.as_i64x8()))
28219 }
28220}
28221
28222/// Broadcast the low packed 64-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28223///
28224/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcastq_epi64&expand=562)
28225#[inline]
28226#[target_feature(enable = "avx512f")]
28227#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28228#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastq
28229#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28230pub const fn _mm512_maskz_broadcastq_epi64(k: __mmask8, a: __m128i) -> __m512i {
28231 unsafe {
28232 let broadcast: Simd = _mm512_broadcastq_epi64(a).as_i64x8();
28233 transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:i64x8::ZERO))
28234 }
28235}
28236
28237/// Broadcast the low packed 64-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28238///
28239/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_broadcastq_epi64&expand=558)
28240#[inline]
28241#[target_feature(enable = "avx512f,avx512vl")]
28242#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28243#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastq
28244#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28245pub const fn _mm256_mask_broadcastq_epi64(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
28246 unsafe {
28247 let broadcast: Simd = _mm256_broadcastq_epi64(a).as_i64x4();
28248 transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:src.as_i64x4()))
28249 }
28250}
28251
28252/// Broadcast the low packed 64-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28253///
28254/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_broadcastq_epi64&expand=559)
28255#[inline]
28256#[target_feature(enable = "avx512f,avx512vl")]
28257#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28258#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastq
28259#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28260pub const fn _mm256_maskz_broadcastq_epi64(k: __mmask8, a: __m128i) -> __m256i {
28261 unsafe {
28262 let broadcast: Simd = _mm256_broadcastq_epi64(a).as_i64x4();
28263 transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:i64x4::ZERO))
28264 }
28265}
28266
28267/// Broadcast the low packed 64-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28268///
28269/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_broadcastq_epi64&expand=555)
28270#[inline]
28271#[target_feature(enable = "avx512f,avx512vl")]
28272#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28273#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastq
28274#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28275pub const fn _mm_mask_broadcastq_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
28276 unsafe {
28277 let broadcast: Simd = _mm_broadcastq_epi64(a).as_i64x2();
28278 transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:src.as_i64x2()))
28279 }
28280}
28281
28282/// Broadcast the low packed 64-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28283///
28284/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_broadcastq_epi64&expand=556)
28285#[inline]
28286#[target_feature(enable = "avx512f,avx512vl")]
28287#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28288#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastq
28289#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28290pub const fn _mm_maskz_broadcastq_epi64(k: __mmask8, a: __m128i) -> __m128i {
28291 unsafe {
28292 let broadcast: Simd = _mm_broadcastq_epi64(a).as_i64x2();
28293 transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:i64x2::ZERO))
28294 }
28295}
28296
28297/// Broadcast the low single-precision (32-bit) floating-point element from a to all elements of dst.
28298///
28299/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcastss_ps&expand=578)
28300#[inline]
28301#[target_feature(enable = "avx512f")]
28302#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28303#[cfg_attr(test, assert_instr(vbroadcastss))]
28304#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28305pub const fn _mm512_broadcastss_ps(a: __m128) -> __m512 {
28306 unsafe { simd_shuffle!(a, a, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]) }
28307}
28308
28309/// Broadcast the low single-precision (32-bit) floating-point element from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28310///
28311/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcastss_ps&expand=579)
28312#[inline]
28313#[target_feature(enable = "avx512f")]
28314#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28315#[cfg_attr(test, assert_instr(vbroadcastss))]
28316#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28317pub const fn _mm512_mask_broadcastss_ps(src: __m512, k: __mmask16, a: __m128) -> __m512 {
28318 unsafe {
28319 let broadcast: Simd = _mm512_broadcastss_ps(a).as_f32x16();
28320 transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:src.as_f32x16()))
28321 }
28322}
28323
28324/// Broadcast the low single-precision (32-bit) floating-point element from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28325///
28326/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcastss_ps&expand=580)
28327#[inline]
28328#[target_feature(enable = "avx512f")]
28329#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28330#[cfg_attr(test, assert_instr(vbroadcastss))]
28331#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28332pub const fn _mm512_maskz_broadcastss_ps(k: __mmask16, a: __m128) -> __m512 {
28333 unsafe {
28334 let broadcast: Simd = _mm512_broadcastss_ps(a).as_f32x16();
28335 transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:f32x16::ZERO))
28336 }
28337}
28338
28339/// Broadcast the low single-precision (32-bit) floating-point element from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28340///
28341/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_broadcastss_ps&expand=576)
28342#[inline]
28343#[target_feature(enable = "avx512f,avx512vl")]
28344#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28345#[cfg_attr(test, assert_instr(vbroadcastss))]
28346#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28347pub const fn _mm256_mask_broadcastss_ps(src: __m256, k: __mmask8, a: __m128) -> __m256 {
28348 unsafe {
28349 let broadcast: Simd = _mm256_broadcastss_ps(a).as_f32x8();
28350 transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:src.as_f32x8()))
28351 }
28352}
28353
28354/// Broadcast the low single-precision (32-bit) floating-point element from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28355///
28356/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_broadcastss_ps&expand=577)
28357#[inline]
28358#[target_feature(enable = "avx512f,avx512vl")]
28359#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28360#[cfg_attr(test, assert_instr(vbroadcastss))]
28361#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28362pub const fn _mm256_maskz_broadcastss_ps(k: __mmask8, a: __m128) -> __m256 {
28363 unsafe {
28364 let broadcast: Simd = _mm256_broadcastss_ps(a).as_f32x8();
28365 transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:f32x8::ZERO))
28366 }
28367}
28368
28369/// Broadcast the low single-precision (32-bit) floating-point element from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28370///
28371/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_broadcastss_ps&expand=573)
28372#[inline]
28373#[target_feature(enable = "avx512f,avx512vl")]
28374#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28375#[cfg_attr(test, assert_instr(vbroadcastss))]
28376#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28377pub const fn _mm_mask_broadcastss_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 {
28378 unsafe {
28379 let broadcast: Simd = _mm_broadcastss_ps(a).as_f32x4();
28380 transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:src.as_f32x4()))
28381 }
28382}
28383
28384/// Broadcast the low single-precision (32-bit) floating-point element from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28385///
28386/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_broadcastss_ps&expand=574)
28387#[inline]
28388#[target_feature(enable = "avx512f,avx512vl")]
28389#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28390#[cfg_attr(test, assert_instr(vbroadcastss))]
28391#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28392pub const fn _mm_maskz_broadcastss_ps(k: __mmask8, a: __m128) -> __m128 {
28393 unsafe {
28394 let broadcast: Simd = _mm_broadcastss_ps(a).as_f32x4();
28395 transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:f32x4::ZERO))
28396 }
28397}
28398
28399/// Broadcast the low double-precision (64-bit) floating-point element from a to all elements of dst.
28400///
28401/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcastsd_pd&expand=567)
28402#[inline]
28403#[target_feature(enable = "avx512f")]
28404#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28405#[cfg_attr(test, assert_instr(vbroadcastsd))]
28406#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28407pub const fn _mm512_broadcastsd_pd(a: __m128d) -> __m512d {
28408 unsafe { simd_shuffle!(a, a, [0, 0, 0, 0, 0, 0, 0, 0]) }
28409}
28410
28411/// Broadcast the low double-precision (64-bit) floating-point element from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28412///
28413/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcastsd_pd&expand=568)
28414#[inline]
28415#[target_feature(enable = "avx512f")]
28416#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28417#[cfg_attr(test, assert_instr(vbroadcastsd))]
28418#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28419pub const fn _mm512_mask_broadcastsd_pd(src: __m512d, k: __mmask8, a: __m128d) -> __m512d {
28420 unsafe {
28421 let broadcast: Simd = _mm512_broadcastsd_pd(a).as_f64x8();
28422 transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:src.as_f64x8()))
28423 }
28424}
28425
28426/// Broadcast the low double-precision (64-bit) floating-point element from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28427///
28428/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcastsd_pd&expand=569)
28429#[inline]
28430#[target_feature(enable = "avx512f")]
28431#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28432#[cfg_attr(test, assert_instr(vbroadcastsd))]
28433#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28434pub const fn _mm512_maskz_broadcastsd_pd(k: __mmask8, a: __m128d) -> __m512d {
28435 unsafe {
28436 let broadcast: Simd = _mm512_broadcastsd_pd(a).as_f64x8();
28437 transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:f64x8::ZERO))
28438 }
28439}
28440
28441/// Broadcast the low double-precision (64-bit) floating-point element from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28442///
28443/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_broadcastsd_pd&expand=565)
28444#[inline]
28445#[target_feature(enable = "avx512f,avx512vl")]
28446#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28447#[cfg_attr(test, assert_instr(vbroadcastsd))]
28448#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28449pub const fn _mm256_mask_broadcastsd_pd(src: __m256d, k: __mmask8, a: __m128d) -> __m256d {
28450 unsafe {
28451 let broadcast: Simd = _mm256_broadcastsd_pd(a).as_f64x4();
28452 transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:src.as_f64x4()))
28453 }
28454}
28455
28456/// Broadcast the low double-precision (64-bit) floating-point element from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28457///
28458/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_broadcastsd_pd&expand=566)
28459#[inline]
28460#[target_feature(enable = "avx512f,avx512vl")]
28461#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28462#[cfg_attr(test, assert_instr(vbroadcastsd))]
28463#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28464pub const fn _mm256_maskz_broadcastsd_pd(k: __mmask8, a: __m128d) -> __m256d {
28465 unsafe {
28466 let broadcast: Simd = _mm256_broadcastsd_pd(a).as_f64x4();
28467 transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:f64x4::ZERO))
28468 }
28469}
28470
28471/// Broadcast the 4 packed 32-bit integers from a to all elements of dst.
28472///
28473/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcast_i32x4&expand=510)
28474#[inline]
28475#[target_feature(enable = "avx512f")] //msvc: vbroadcasti32x4, linux: vshuf
28476#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28477#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28478pub const fn _mm512_broadcast_i32x4(a: __m128i) -> __m512i {
28479 unsafe {
28480 let a: Simd = a.as_i32x4();
28481 let ret: i32x16 = simd_shuffle!(a, a, [0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3]);
28482 transmute(src:ret)
28483 }
28484}
28485
28486/// Broadcast the 4 packed 32-bit integers from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28487///
28488/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcast_i32x4&expand=511)
28489#[inline]
28490#[target_feature(enable = "avx512f")] //msvc: vbroadcasti32x4, linux: vshuf
28491#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28492#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28493pub const fn _mm512_mask_broadcast_i32x4(src: __m512i, k: __mmask16, a: __m128i) -> __m512i {
28494 unsafe {
28495 let broadcast: Simd = _mm512_broadcast_i32x4(a).as_i32x16();
28496 transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:src.as_i32x16()))
28497 }
28498}
28499
28500/// Broadcast the 4 packed 32-bit integers from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28501///
28502/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcast_i32x4&expand=512)
28503#[inline]
28504#[target_feature(enable = "avx512f")] //msvc: vbroadcasti32x4, linux: vshuf
28505#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28506#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28507pub const fn _mm512_maskz_broadcast_i32x4(k: __mmask16, a: __m128i) -> __m512i {
28508 unsafe {
28509 let broadcast: Simd = _mm512_broadcast_i32x4(a).as_i32x16();
28510 transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:i32x16::ZERO))
28511 }
28512}
28513
28514/// Broadcast the 4 packed 32-bit integers from a to all elements of dst.
28515///
28516/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_broadcast_i32x4&expand=507)
28517#[inline]
28518#[target_feature(enable = "avx512f,avx512vl")] //msvc: vbroadcasti32x4, linux: vshuf
28519#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28520#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28521pub const fn _mm256_broadcast_i32x4(a: __m128i) -> __m256i {
28522 unsafe {
28523 let a: Simd = a.as_i32x4();
28524 let ret: i32x8 = simd_shuffle!(a, a, [0, 1, 2, 3, 0, 1, 2, 3]);
28525 transmute(src:ret)
28526 }
28527}
28528
28529/// Broadcast the 4 packed 32-bit integers from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28530///
28531/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_broadcast_i32x4&expand=508)
28532#[inline]
28533#[target_feature(enable = "avx512f,avx512vl")] //msvc: vbroadcasti32x4, linux: vshuf
28534#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28535#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28536pub const fn _mm256_mask_broadcast_i32x4(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
28537 unsafe {
28538 let broadcast: Simd = _mm256_broadcast_i32x4(a).as_i32x8();
28539 transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:src.as_i32x8()))
28540 }
28541}
28542
28543/// Broadcast the 4 packed 32-bit integers from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28544///
28545/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_broadcast_i32x4&expand=509)
28546#[inline]
28547#[target_feature(enable = "avx512f,avx512vl")] //msvc: vbroadcasti32x4, linux: vshuf
28548#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28549#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28550pub const fn _mm256_maskz_broadcast_i32x4(k: __mmask8, a: __m128i) -> __m256i {
28551 unsafe {
28552 let broadcast: Simd = _mm256_broadcast_i32x4(a).as_i32x8();
28553 transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:i32x8::ZERO))
28554 }
28555}
28556
28557/// Broadcast the 4 packed 64-bit integers from a to all elements of dst.
28558///
28559/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcast_i64x4&expand=522)
28560#[inline]
28561#[target_feature(enable = "avx512f")] //msvc: vbroadcasti64x4, linux: vperm
28562#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28563#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28564pub const fn _mm512_broadcast_i64x4(a: __m256i) -> __m512i {
28565 unsafe { simd_shuffle!(a, a, [0, 1, 2, 3, 0, 1, 2, 3]) }
28566}
28567
28568/// Broadcast the 4 packed 64-bit integers from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28569///
28570/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcast_i64x4&expand=523)
28571#[inline]
28572#[target_feature(enable = "avx512f")] //msvc: vbroadcasti64x4, linux: vperm
28573#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28574#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28575pub const fn _mm512_mask_broadcast_i64x4(src: __m512i, k: __mmask8, a: __m256i) -> __m512i {
28576 unsafe {
28577 let broadcast: Simd = _mm512_broadcast_i64x4(a).as_i64x8();
28578 transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:src.as_i64x8()))
28579 }
28580}
28581
28582/// Broadcast the 4 packed 64-bit integers from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28583///
28584/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcast_i64x4&expand=524)
28585#[inline]
28586#[target_feature(enable = "avx512f")] //msvc: vbroadcasti64x4, linux: vperm
28587#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28588#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28589pub const fn _mm512_maskz_broadcast_i64x4(k: __mmask8, a: __m256i) -> __m512i {
28590 unsafe {
28591 let broadcast: Simd = _mm512_broadcast_i64x4(a).as_i64x8();
28592 transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:i64x8::ZERO))
28593 }
28594}
28595
28596/// Broadcast the 4 packed single-precision (32-bit) floating-point elements from a to all elements of dst.
28597///
28598/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcast_f32x4&expand=483)
28599#[inline]
28600#[target_feature(enable = "avx512f")] //msvc: vbroadcastf32x4, linux: vshuf
28601#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28602#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28603pub const fn _mm512_broadcast_f32x4(a: __m128) -> __m512 {
28604 unsafe { simd_shuffle!(a, a, [0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3]) }
28605}
28606
28607/// Broadcast the 4 packed single-precision (32-bit) floating-point elements from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28608///
28609/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcast_f32x4&expand=484)
28610#[inline]
28611#[target_feature(enable = "avx512f")] //msvc: vbroadcastf32x4, linux: vshu
28612#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28613#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28614pub const fn _mm512_mask_broadcast_f32x4(src: __m512, k: __mmask16, a: __m128) -> __m512 {
28615 unsafe {
28616 let broadcast: Simd = _mm512_broadcast_f32x4(a).as_f32x16();
28617 transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:src.as_f32x16()))
28618 }
28619}
28620
28621/// Broadcast the 4 packed single-precision (32-bit) floating-point elements from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28622///
28623/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcast_f32x4&expand=485)
28624#[inline]
28625#[target_feature(enable = "avx512f")] //msvc: vbroadcastf32x4, linux: vshu
28626#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28627#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28628pub const fn _mm512_maskz_broadcast_f32x4(k: __mmask16, a: __m128) -> __m512 {
28629 unsafe {
28630 let broadcast: Simd = _mm512_broadcast_f32x4(a).as_f32x16();
28631 transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:f32x16::ZERO))
28632 }
28633}
28634
28635/// Broadcast the 4 packed single-precision (32-bit) floating-point elements from a to all elements of dst.
28636///
28637/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_broadcast_f32x4&expand=480)
28638#[inline]
28639#[target_feature(enable = "avx512f,avx512vl")] //msvc: vbroadcastf32x4, linux: vshuf
28640#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28641#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28642pub const fn _mm256_broadcast_f32x4(a: __m128) -> __m256 {
28643 unsafe { simd_shuffle!(a, a, [0, 1, 2, 3, 0, 1, 2, 3]) }
28644}
28645
28646/// Broadcast the 4 packed single-precision (32-bit) floating-point elements from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28647///
28648/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_broadcast_f32x4&expand=481)
28649#[inline]
28650#[target_feature(enable = "avx512f,avx512vl")] //msvc: vbroadcastf32x4, linux: vshu
28651#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28652#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28653pub const fn _mm256_mask_broadcast_f32x4(src: __m256, k: __mmask8, a: __m128) -> __m256 {
28654 unsafe {
28655 let broadcast: Simd = _mm256_broadcast_f32x4(a).as_f32x8();
28656 transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:src.as_f32x8()))
28657 }
28658}
28659
28660/// Broadcast the 4 packed single-precision (32-bit) floating-point elements from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28661///
28662/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_broadcast_f32x4&expand=482)
28663#[inline]
28664#[target_feature(enable = "avx512f,avx512vl")] //msvc: vbroadcastf32x4, linux: vshu
28665#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28666#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28667pub const fn _mm256_maskz_broadcast_f32x4(k: __mmask8, a: __m128) -> __m256 {
28668 unsafe {
28669 let broadcast: Simd = _mm256_broadcast_f32x4(a).as_f32x8();
28670 transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:f32x8::ZERO))
28671 }
28672}
28673
28674/// Broadcast the 4 packed double-precision (64-bit) floating-point elements from a to all elements of dst.
28675///
28676/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcast_f64x4&expand=495)
28677#[inline]
28678#[target_feature(enable = "avx512f")] //msvc: vbroadcastf64x4, linux: vperm
28679#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28680#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28681pub const fn _mm512_broadcast_f64x4(a: __m256d) -> __m512d {
28682 unsafe { simd_shuffle!(a, a, [0, 1, 2, 3, 0, 1, 2, 3]) }
28683}
28684
28685/// Broadcast the 4 packed double-precision (64-bit) floating-point elements from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28686///
28687/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcast_f64x4&expand=496)
28688#[inline]
28689#[target_feature(enable = "avx512f")] //msvc: vbroadcastf64x4, linux: vper
28690#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28691#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28692pub const fn _mm512_mask_broadcast_f64x4(src: __m512d, k: __mmask8, a: __m256d) -> __m512d {
28693 unsafe {
28694 let broadcast: Simd = _mm512_broadcast_f64x4(a).as_f64x8();
28695 transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:src.as_f64x8()))
28696 }
28697}
28698
28699/// Broadcast the 4 packed double-precision (64-bit) floating-point elements from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28700///
28701/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcast_f64x4&expand=497)
28702#[inline]
28703#[target_feature(enable = "avx512f")] //msvc: vbroadcastf64x4, linux: vper
28704#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28705#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28706pub const fn _mm512_maskz_broadcast_f64x4(k: __mmask8, a: __m256d) -> __m512d {
28707 unsafe {
28708 let broadcast: Simd = _mm512_broadcast_f64x4(a).as_f64x8();
28709 transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:f64x8::ZERO))
28710 }
28711}
28712
28713/// Blend packed 32-bit integers from a and b using control mask k, and store the results in dst.
28714///
28715/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_blend_epi32&expand=435)
28716#[inline]
28717#[target_feature(enable = "avx512f")]
28718#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28719#[cfg_attr(test, assert_instr(vmovdqa32))] //should be vpblendmd
28720#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28721pub const fn _mm512_mask_blend_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
28722 unsafe { transmute(src:simd_select_bitmask(m:k, yes:b.as_i32x16(), no:a.as_i32x16())) }
28723}
28724
28725/// Blend packed 32-bit integers from a and b using control mask k, and store the results in dst.
28726///
28727/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_blend_epi32&expand=434)
28728#[inline]
28729#[target_feature(enable = "avx512f,avx512vl")]
28730#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28731#[cfg_attr(test, assert_instr(vmovdqa32))] //should be vpblendmd
28732#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28733pub const fn _mm256_mask_blend_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
28734 unsafe { transmute(src:simd_select_bitmask(m:k, yes:b.as_i32x8(), no:a.as_i32x8())) }
28735}
28736
28737/// Blend packed 32-bit integers from a and b using control mask k, and store the results in dst.
28738///
28739/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_blend_epi32&expand=432)
28740#[inline]
28741#[target_feature(enable = "avx512f,avx512vl")]
28742#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28743#[cfg_attr(test, assert_instr(vmovdqa32))] //should be vpblendmd
28744#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28745pub const fn _mm_mask_blend_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
28746 unsafe { transmute(src:simd_select_bitmask(m:k, yes:b.as_i32x4(), no:a.as_i32x4())) }
28747}
28748
28749/// Blend packed 64-bit integers from a and b using control mask k, and store the results in dst.
28750///
28751/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_blend_epi64&expand=438)
28752#[inline]
28753#[target_feature(enable = "avx512f")]
28754#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28755#[cfg_attr(test, assert_instr(vmovdqa64))] //should be vpblendmq
28756#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28757pub const fn _mm512_mask_blend_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
28758 unsafe { transmute(src:simd_select_bitmask(m:k, yes:b.as_i64x8(), no:a.as_i64x8())) }
28759}
28760
28761/// Blend packed 64-bit integers from a and b using control mask k, and store the results in dst.
28762///
28763/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_blend_epi64&expand=437)
28764#[inline]
28765#[target_feature(enable = "avx512f,avx512vl")]
28766#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28767#[cfg_attr(test, assert_instr(vmovdqa64))] //should be vpblendmq
28768#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28769pub const fn _mm256_mask_blend_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
28770 unsafe { transmute(src:simd_select_bitmask(m:k, yes:b.as_i64x4(), no:a.as_i64x4())) }
28771}
28772
28773/// Blend packed 64-bit integers from a and b using control mask k, and store the results in dst.
28774///
28775/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_blend_epi64&expand=436)
28776#[inline]
28777#[target_feature(enable = "avx512f,avx512vl")]
28778#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28779#[cfg_attr(test, assert_instr(vmovdqa64))] //should be vpblendmq
28780#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28781pub const fn _mm_mask_blend_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
28782 unsafe { transmute(src:simd_select_bitmask(m:k, yes:b.as_i64x2(), no:a.as_i64x2())) }
28783}
28784
28785/// Blend packed single-precision (32-bit) floating-point elements from a and b using control mask k, and store the results in dst.
28786///
28787/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_blend_ps&expand=451)
28788#[inline]
28789#[target_feature(enable = "avx512f")]
28790#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28791#[cfg_attr(test, assert_instr(vmovaps))] //should be vpblendmps
28792#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28793pub const fn _mm512_mask_blend_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
28794 unsafe { transmute(src:simd_select_bitmask(m:k, yes:b.as_f32x16(), no:a.as_f32x16())) }
28795}
28796
28797/// Blend packed single-precision (32-bit) floating-point elements from a and b using control mask k, and store the results in dst.
28798///
28799/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_blend_ps&expand=450)
28800#[inline]
28801#[target_feature(enable = "avx512f,avx512vl")]
28802#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28803#[cfg_attr(test, assert_instr(vmovaps))] //should be vpblendmps
28804#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28805pub const fn _mm256_mask_blend_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
28806 unsafe { transmute(src:simd_select_bitmask(m:k, yes:b.as_f32x8(), no:a.as_f32x8())) }
28807}
28808
28809/// Blend packed single-precision (32-bit) floating-point elements from a and b using control mask k, and store the results in dst.
28810///
28811/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_blend_ps&expand=448)
28812#[inline]
28813#[target_feature(enable = "avx512f,avx512vl")]
28814#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28815#[cfg_attr(test, assert_instr(vmovaps))] //should be vpblendmps
28816#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28817pub const fn _mm_mask_blend_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
28818 unsafe { transmute(src:simd_select_bitmask(m:k, yes:b.as_f32x4(), no:a.as_f32x4())) }
28819}
28820
28821/// Blend packed double-precision (64-bit) floating-point elements from a and b using control mask k, and store the results in dst.
28822///
28823/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_blend_pd&expand=446)
28824#[inline]
28825#[target_feature(enable = "avx512f")]
28826#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28827#[cfg_attr(test, assert_instr(vmovapd))] //should be vpblendmpd
28828#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28829pub const fn _mm512_mask_blend_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
28830 unsafe { transmute(src:simd_select_bitmask(m:k, yes:b.as_f64x8(), no:a.as_f64x8())) }
28831}
28832
28833/// Blend packed double-precision (64-bit) floating-point elements from a and b using control mask k, and store the results in dst.
28834///
28835/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_blend_pd&expand=445)
28836#[inline]
28837#[target_feature(enable = "avx512f,avx512vl")]
28838#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28839#[cfg_attr(test, assert_instr(vmovapd))] //should be vpblendmpd
28840#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28841pub const fn _mm256_mask_blend_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
28842 unsafe { transmute(src:simd_select_bitmask(m:k, yes:b.as_f64x4(), no:a.as_f64x4())) }
28843}
28844
28845/// Blend packed double-precision (64-bit) floating-point elements from a and b using control mask k, and store the results in dst.
28846///
28847/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_blend_pd&expand=443)
28848#[inline]
28849#[target_feature(enable = "avx512f,avx512vl")]
28850#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28851#[cfg_attr(test, assert_instr(vmovapd))] //should be vpblendmpd
28852#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28853pub const fn _mm_mask_blend_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
28854 unsafe { transmute(src:simd_select_bitmask(m:k, yes:b.as_f64x2(), no:a.as_f64x2())) }
28855}
28856
28857/// Concatenate a and b into a 128-byte immediate result, shift the result right by imm8 32-bit elements, and store the low 64 bytes (16 elements) in dst.
28858///
28859/// <div class="warning">Only lowest <strong>4 bits</strong> are used from the mask (shift at maximum by 60 bytes)!</div>
28860///
28861/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_alignr_epi32&expand=245)
28862#[inline]
28863#[target_feature(enable = "avx512f")]
28864#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28865#[cfg_attr(test, assert_instr(valignd, IMM8 = 1))]
28866#[rustc_legacy_const_generics(2)]
28867#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28868pub const fn _mm512_alignr_epi32<const IMM8: i32>(a: __m512i, b: __m512i) -> __m512i {
28869 unsafe {
28870 static_assert_uimm_bits!(IMM8, 8);
28871 let a = a.as_i32x16();
28872 let b = b.as_i32x16();
28873 let imm8: i32 = IMM8 % 16;
28874 let r: i32x16 = match imm8 {
28875 0 => simd_shuffle!(
28876 a,
28877 b,
28878 [
28879 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
28880 ],
28881 ),
28882 1 => simd_shuffle!(
28883 a,
28884 b,
28885 [
28886 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0,
28887 ],
28888 ),
28889 2 => simd_shuffle!(
28890 a,
28891 b,
28892 [18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1],
28893 ),
28894 3 => simd_shuffle!(
28895 a,
28896 b,
28897 [19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2],
28898 ),
28899 4 => simd_shuffle!(
28900 a,
28901 b,
28902 [20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3],
28903 ),
28904 5 => simd_shuffle!(
28905 a,
28906 b,
28907 [21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4],
28908 ),
28909 6 => simd_shuffle!(
28910 a,
28911 b,
28912 [22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5],
28913 ),
28914 7 => simd_shuffle!(
28915 a,
28916 b,
28917 [23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6],
28918 ),
28919 8 => simd_shuffle!(
28920 a,
28921 b,
28922 [24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7],
28923 ),
28924 9 => simd_shuffle!(
28925 a,
28926 b,
28927 [25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8],
28928 ),
28929 10 => simd_shuffle!(a, b, [26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9]),
28930 11 => simd_shuffle!(a, b, [27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]),
28931 12 => simd_shuffle!(a, b, [28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]),
28932 13 => simd_shuffle!(a, b, [29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]),
28933 14 => simd_shuffle!(a, b, [30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13]),
28934 15 => simd_shuffle!(a, b, [31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14]),
28935 _ => unreachable_unchecked(),
28936 };
28937 transmute(r)
28938 }
28939}
28940
28941/// Concatenate a and b into a 128-byte immediate result, shift the result right by imm8 32-bit elements, and store the low 64 bytes (16 elements) in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28942///
28943/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_alignr_epi32&expand=246)
28944#[inline]
28945#[target_feature(enable = "avx512f")]
28946#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28947#[cfg_attr(test, assert_instr(valignd, IMM8 = 1))]
28948#[rustc_legacy_const_generics(4)]
28949#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28950pub const fn _mm512_mask_alignr_epi32<const IMM8: i32>(
28951 src: __m512i,
28952 k: __mmask16,
28953 a: __m512i,
28954 b: __m512i,
28955) -> __m512i {
28956 unsafe {
28957 static_assert_uimm_bits!(IMM8, 8);
28958 let r: __m512i = _mm512_alignr_epi32::<IMM8>(a, b);
28959 transmute(src:simd_select_bitmask(m:k, yes:r.as_i32x16(), no:src.as_i32x16()))
28960 }
28961}
28962
28963/// Concatenate a and b into a 128-byte immediate result, shift the result right by imm8 32-bit elements, and stores the low 64 bytes (16 elements) in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28964///
28965/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_alignr_epi32&expand=247)
28966#[inline]
28967#[target_feature(enable = "avx512f")]
28968#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28969#[cfg_attr(test, assert_instr(valignd, IMM8 = 1))]
28970#[rustc_legacy_const_generics(3)]
28971#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28972pub const fn _mm512_maskz_alignr_epi32<const IMM8: i32>(
28973 k: __mmask16,
28974 a: __m512i,
28975 b: __m512i,
28976) -> __m512i {
28977 unsafe {
28978 static_assert_uimm_bits!(IMM8, 8);
28979 let r: __m512i = _mm512_alignr_epi32::<IMM8>(a, b);
28980 transmute(src:simd_select_bitmask(m:k, yes:r.as_i32x16(), no:i32x16::ZERO))
28981 }
28982}
28983
28984/// Concatenate a and b into a 64-byte immediate result, shift the result right by imm8 32-bit elements, and store the low 32 bytes (8 elements) in dst.
28985///
28986/// <div class="warning">Only lowest <strong>3 bits</strong> are used from the mask (shift at maximum by 28 bytes)!</div>
28987///
28988/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_alignr_epi32&expand=242)
28989#[inline]
28990#[target_feature(enable = "avx512f,avx512vl")]
28991#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28992#[cfg_attr(test, assert_instr(valignd, IMM8 = 1))]
28993#[rustc_legacy_const_generics(2)]
28994#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28995pub const fn _mm256_alignr_epi32<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
28996 unsafe {
28997 static_assert_uimm_bits!(IMM8, 8);
28998 let a: Simd = a.as_i32x8();
28999 let b: Simd = b.as_i32x8();
29000 let imm8: i32 = IMM8 % 8;
29001 let r: i32x8 = match imm8 {
29002 0 => simd_shuffle!(a, b, [8, 9, 10, 11, 12, 13, 14, 15]),
29003 1 => simd_shuffle!(a, b, [9, 10, 11, 12, 13, 14, 15, 0]),
29004 2 => simd_shuffle!(a, b, [10, 11, 12, 13, 14, 15, 0, 1]),
29005 3 => simd_shuffle!(a, b, [11, 12, 13, 14, 15, 0, 1, 2]),
29006 4 => simd_shuffle!(a, b, [12, 13, 14, 15, 0, 1, 2, 3]),
29007 5 => simd_shuffle!(a, b, [13, 14, 15, 0, 1, 2, 3, 4]),
29008 6 => simd_shuffle!(a, b, [14, 15, 0, 1, 2, 3, 4, 5]),
29009 7 => simd_shuffle!(a, b, [15, 0, 1, 2, 3, 4, 5, 6]),
29010 _ => unreachable_unchecked(),
29011 };
29012 transmute(src:r)
29013 }
29014}
29015
29016/// Concatenate a and b into a 64-byte immediate result, shift the result right by imm8 32-bit elements, and store the low 32 bytes (8 elements) in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
29017///
29018/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_alignr_epi32&expand=243)
29019#[inline]
29020#[target_feature(enable = "avx512f,avx512vl")]
29021#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29022#[cfg_attr(test, assert_instr(valignd, IMM8 = 1))]
29023#[rustc_legacy_const_generics(4)]
29024#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29025pub const fn _mm256_mask_alignr_epi32<const IMM8: i32>(
29026 src: __m256i,
29027 k: __mmask8,
29028 a: __m256i,
29029 b: __m256i,
29030) -> __m256i {
29031 unsafe {
29032 static_assert_uimm_bits!(IMM8, 8);
29033 let r: __m256i = _mm256_alignr_epi32::<IMM8>(a, b);
29034 transmute(src:simd_select_bitmask(m:k, yes:r.as_i32x8(), no:src.as_i32x8()))
29035 }
29036}
29037
29038/// Concatenate a and b into a 64-byte immediate result, shift the result right by imm8 32-bit elements, and store the low 32 bytes (8 elements) in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
29039///
29040/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_alignr_epi32&expand=244)
29041#[inline]
29042#[target_feature(enable = "avx512f,avx512vl")]
29043#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29044#[cfg_attr(test, assert_instr(valignd, IMM8 = 1))]
29045#[rustc_legacy_const_generics(3)]
29046#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29047pub const fn _mm256_maskz_alignr_epi32<const IMM8: i32>(
29048 k: __mmask8,
29049 a: __m256i,
29050 b: __m256i,
29051) -> __m256i {
29052 unsafe {
29053 static_assert_uimm_bits!(IMM8, 8);
29054 let r: __m256i = _mm256_alignr_epi32::<IMM8>(a, b);
29055 transmute(src:simd_select_bitmask(m:k, yes:r.as_i32x8(), no:i32x8::ZERO))
29056 }
29057}
29058
29059/// Concatenate a and b into a 32-byte immediate result, shift the result right by imm8 32-bit elements, and store the low 16 bytes (4 elements) in dst.
29060///
29061/// <div class="warning">Only lowest <strong>2 bits</strong> are used from the mask (shift at maximum by 12 bytes)!</div>
29062///
29063/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_alignr_epi32&expand=239)
29064#[inline]
29065#[target_feature(enable = "avx512f,avx512vl")]
29066#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29067#[cfg_attr(test, assert_instr(vpalignr, IMM8 = 1))] //should be valignd
29068#[rustc_legacy_const_generics(2)]
29069#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29070pub const fn _mm_alignr_epi32<const IMM8: i32>(a: __m128i, b: __m128i) -> __m128i {
29071 unsafe {
29072 static_assert_uimm_bits!(IMM8, 8);
29073 let a: Simd = a.as_i32x4();
29074 let b: Simd = b.as_i32x4();
29075 let imm8: i32 = IMM8 % 4;
29076 let r: i32x4 = match imm8 {
29077 0 => simd_shuffle!(a, b, [4, 5, 6, 7]),
29078 1 => simd_shuffle!(a, b, [5, 6, 7, 0]),
29079 2 => simd_shuffle!(a, b, [6, 7, 0, 1]),
29080 3 => simd_shuffle!(a, b, [7, 0, 1, 2]),
29081 _ => unreachable_unchecked(),
29082 };
29083 transmute(src:r)
29084 }
29085}
29086
29087/// Concatenate a and b into a 32-byte immediate result, shift the result right by imm8 32-bit elements, and store the low 16 bytes (4 elements) in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
29088///
29089/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_alignr_epi32&expand=240)
29090#[inline]
29091#[target_feature(enable = "avx512f,avx512vl")]
29092#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29093#[cfg_attr(test, assert_instr(valignd, IMM8 = 1))]
29094#[rustc_legacy_const_generics(4)]
29095#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29096pub const fn _mm_mask_alignr_epi32<const IMM8: i32>(
29097 src: __m128i,
29098 k: __mmask8,
29099 a: __m128i,
29100 b: __m128i,
29101) -> __m128i {
29102 unsafe {
29103 static_assert_uimm_bits!(IMM8, 8);
29104 let r: __m128i = _mm_alignr_epi32::<IMM8>(a, b);
29105 transmute(src:simd_select_bitmask(m:k, yes:r.as_i32x4(), no:src.as_i32x4()))
29106 }
29107}
29108
29109/// Concatenate a and b into a 32-byte immediate result, shift the result right by imm8 32-bit elements, and store the low 16 bytes (4 elements) in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
29110///
29111/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_alignr_epi32&expand=241)
29112#[inline]
29113#[target_feature(enable = "avx512f,avx512vl")]
29114#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29115#[cfg_attr(test, assert_instr(valignd, IMM8 = 1))]
29116#[rustc_legacy_const_generics(3)]
29117#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29118pub const fn _mm_maskz_alignr_epi32<const IMM8: i32>(
29119 k: __mmask8,
29120 a: __m128i,
29121 b: __m128i,
29122) -> __m128i {
29123 unsafe {
29124 static_assert_uimm_bits!(IMM8, 8);
29125 let r: __m128i = _mm_alignr_epi32::<IMM8>(a, b);
29126 transmute(src:simd_select_bitmask(m:k, yes:r.as_i32x4(), no:i32x4::ZERO))
29127 }
29128}
29129
29130/// Concatenate a and b into a 128-byte immediate result, shift the result right by imm8 64-bit elements, and store the low 64 bytes (8 elements) in dst.
29131///
29132/// <div class="warning">Only lowest <strong>3 bits</strong> are used from the mask (shift at maximum by 56 bytes)!</div>
29133///
29134/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_alignr_epi64&expand=254)
29135#[inline]
29136#[target_feature(enable = "avx512f")]
29137#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29138#[cfg_attr(test, assert_instr(valignq, IMM8 = 1))]
29139#[rustc_legacy_const_generics(2)]
29140#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29141pub const fn _mm512_alignr_epi64<const IMM8: i32>(a: __m512i, b: __m512i) -> __m512i {
29142 unsafe {
29143 static_assert_uimm_bits!(IMM8, 8);
29144 let imm8: i32 = IMM8 % 8;
29145 let r: i64x8 = match imm8 {
29146 0 => simd_shuffle!(a, b, [8, 9, 10, 11, 12, 13, 14, 15]),
29147 1 => simd_shuffle!(a, b, [9, 10, 11, 12, 13, 14, 15, 0]),
29148 2 => simd_shuffle!(a, b, [10, 11, 12, 13, 14, 15, 0, 1]),
29149 3 => simd_shuffle!(a, b, [11, 12, 13, 14, 15, 0, 1, 2]),
29150 4 => simd_shuffle!(a, b, [12, 13, 14, 15, 0, 1, 2, 3]),
29151 5 => simd_shuffle!(a, b, [13, 14, 15, 0, 1, 2, 3, 4]),
29152 6 => simd_shuffle!(a, b, [14, 15, 0, 1, 2, 3, 4, 5]),
29153 7 => simd_shuffle!(a, b, [15, 0, 1, 2, 3, 4, 5, 6]),
29154 _ => unreachable_unchecked(),
29155 };
29156 transmute(src:r)
29157 }
29158}
29159
29160/// Concatenate a and b into a 128-byte immediate result, shift the result right by imm8 64-bit elements, and store the low 64 bytes (8 elements) in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
29161///
29162/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_alignr_epi64&expand=255)
29163#[inline]
29164#[target_feature(enable = "avx512f")]
29165#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29166#[cfg_attr(test, assert_instr(valignq, IMM8 = 1))]
29167#[rustc_legacy_const_generics(4)]
29168#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29169pub const fn _mm512_mask_alignr_epi64<const IMM8: i32>(
29170 src: __m512i,
29171 k: __mmask8,
29172 a: __m512i,
29173 b: __m512i,
29174) -> __m512i {
29175 unsafe {
29176 static_assert_uimm_bits!(IMM8, 8);
29177 let r: __m512i = _mm512_alignr_epi64::<IMM8>(a, b);
29178 transmute(src:simd_select_bitmask(m:k, yes:r.as_i64x8(), no:src.as_i64x8()))
29179 }
29180}
29181
29182/// Concatenate a and b into a 128-byte immediate result, shift the result right by imm8 64-bit elements, and stores the low 64 bytes (8 elements) in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
29183///
29184/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_alignr_epi64&expand=256)
29185#[inline]
29186#[target_feature(enable = "avx512f")]
29187#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29188#[cfg_attr(test, assert_instr(valignq, IMM8 = 1))]
29189#[rustc_legacy_const_generics(3)]
29190#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29191pub const fn _mm512_maskz_alignr_epi64<const IMM8: i32>(
29192 k: __mmask8,
29193 a: __m512i,
29194 b: __m512i,
29195) -> __m512i {
29196 unsafe {
29197 static_assert_uimm_bits!(IMM8, 8);
29198 let r: __m512i = _mm512_alignr_epi64::<IMM8>(a, b);
29199 transmute(src:simd_select_bitmask(m:k, yes:r.as_i64x8(), no:i64x8::ZERO))
29200 }
29201}
29202
29203/// Concatenate a and b into a 64-byte immediate result, shift the result right by imm8 64-bit elements, and store the low 32 bytes (4 elements) in dst.
29204///
29205/// <div class="warning">Only lowest <strong>2 bits</strong> are used from the mask (shift at maximum by 24 bytes)!</div>
29206///
29207/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_alignr_epi64&expand=251)
29208#[inline]
29209#[target_feature(enable = "avx512f,avx512vl")]
29210#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29211#[cfg_attr(test, assert_instr(valignq, IMM8 = 1))]
29212#[rustc_legacy_const_generics(2)]
29213#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29214pub const fn _mm256_alignr_epi64<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
29215 unsafe {
29216 static_assert_uimm_bits!(IMM8, 8);
29217 let imm8: i32 = IMM8 % 4;
29218 let r: i64x4 = match imm8 {
29219 0 => simd_shuffle!(a, b, [4, 5, 6, 7]),
29220 1 => simd_shuffle!(a, b, [5, 6, 7, 0]),
29221 2 => simd_shuffle!(a, b, [6, 7, 0, 1]),
29222 3 => simd_shuffle!(a, b, [7, 0, 1, 2]),
29223 _ => unreachable_unchecked(),
29224 };
29225 transmute(src:r)
29226 }
29227}
29228
29229/// Concatenate a and b into a 64-byte immediate result, shift the result right by imm8 64-bit elements, and store the low 32 bytes (4 elements) in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
29230///
29231/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_alignr_epi64&expand=252)
29232#[inline]
29233#[target_feature(enable = "avx512f,avx512vl")]
29234#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29235#[cfg_attr(test, assert_instr(valignq, IMM8 = 1))]
29236#[rustc_legacy_const_generics(4)]
29237#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29238pub const fn _mm256_mask_alignr_epi64<const IMM8: i32>(
29239 src: __m256i,
29240 k: __mmask8,
29241 a: __m256i,
29242 b: __m256i,
29243) -> __m256i {
29244 unsafe {
29245 static_assert_uimm_bits!(IMM8, 8);
29246 let r: __m256i = _mm256_alignr_epi64::<IMM8>(a, b);
29247 transmute(src:simd_select_bitmask(m:k, yes:r.as_i64x4(), no:src.as_i64x4()))
29248 }
29249}
29250
29251/// Concatenate a and b into a 64-byte immediate result, shift the result right by imm8 64-bit elements, and store the low 32 bytes (4 elements) in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
29252///
29253/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_alignr_epi64&expand=253)
29254#[inline]
29255#[target_feature(enable = "avx512f,avx512vl")]
29256#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29257#[cfg_attr(test, assert_instr(valignq, IMM8 = 1))]
29258#[rustc_legacy_const_generics(3)]
29259#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29260pub const fn _mm256_maskz_alignr_epi64<const IMM8: i32>(
29261 k: __mmask8,
29262 a: __m256i,
29263 b: __m256i,
29264) -> __m256i {
29265 unsafe {
29266 static_assert_uimm_bits!(IMM8, 8);
29267 let r: __m256i = _mm256_alignr_epi64::<IMM8>(a, b);
29268 transmute(src:simd_select_bitmask(m:k, yes:r.as_i64x4(), no:i64x4::ZERO))
29269 }
29270}
29271
29272/// Concatenate a and b into a 32-byte immediate result, shift the result right by imm8 64-bit elements, and store the low 16 bytes (2 elements) in dst.
29273///
29274/// <div class="warning">Only lowest <strong>bit</strong> is used from the mask (shift at maximum by 8 bytes)!</div>
29275///
29276/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_alignr_epi64&expand=248)
29277#[inline]
29278#[target_feature(enable = "avx512f,avx512vl")]
29279#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29280#[cfg_attr(test, assert_instr(vpalignr, IMM8 = 1))] //should be valignq
29281#[rustc_legacy_const_generics(2)]
29282#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29283pub const fn _mm_alignr_epi64<const IMM8: i32>(a: __m128i, b: __m128i) -> __m128i {
29284 unsafe {
29285 static_assert_uimm_bits!(IMM8, 8);
29286 let imm8: i32 = IMM8 % 2;
29287 let r: i64x2 = match imm8 {
29288 0 => simd_shuffle!(a, b, [2, 3]),
29289 1 => simd_shuffle!(a, b, [3, 0]),
29290 _ => unreachable_unchecked(),
29291 };
29292 transmute(src:r)
29293 }
29294}
29295
29296/// Concatenate a and b into a 32-byte immediate result, shift the result right by imm8 64-bit elements, and store the low 16 bytes (2 elements) in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
29297///
29298/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_alignr_epi64&expand=249)
29299#[inline]
29300#[target_feature(enable = "avx512f,avx512vl")]
29301#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29302#[cfg_attr(test, assert_instr(valignq, IMM8 = 1))]
29303#[rustc_legacy_const_generics(4)]
29304#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29305pub const fn _mm_mask_alignr_epi64<const IMM8: i32>(
29306 src: __m128i,
29307 k: __mmask8,
29308 a: __m128i,
29309 b: __m128i,
29310) -> __m128i {
29311 unsafe {
29312 static_assert_uimm_bits!(IMM8, 8);
29313 let r: __m128i = _mm_alignr_epi64::<IMM8>(a, b);
29314 transmute(src:simd_select_bitmask(m:k, yes:r.as_i64x2(), no:src.as_i64x2()))
29315 }
29316}
29317
29318/// Concatenate a and b into a 32-byte immediate result, shift the result right by imm8 64-bit elements, and store the low 16 bytes (2 elements) in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
29319///
29320/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_alignr_epi64&expand=250)
29321#[inline]
29322#[target_feature(enable = "avx512f,avx512vl")]
29323#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29324#[cfg_attr(test, assert_instr(valignq, IMM8 = 1))]
29325#[rustc_legacy_const_generics(3)]
29326#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29327pub const fn _mm_maskz_alignr_epi64<const IMM8: i32>(
29328 k: __mmask8,
29329 a: __m128i,
29330 b: __m128i,
29331) -> __m128i {
29332 unsafe {
29333 static_assert_uimm_bits!(IMM8, 8);
29334 let r: __m128i = _mm_alignr_epi64::<IMM8>(a, b);
29335 transmute(src:simd_select_bitmask(m:k, yes:r.as_i64x2(), no:i64x2::ZERO))
29336 }
29337}
29338
29339/// Compute the bitwise AND of packed 32-bit integers in a and b, and store the results in dst.
29340///
29341/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_and_epi32&expand=272)
29342#[inline]
29343#[target_feature(enable = "avx512f")]
29344#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29345#[cfg_attr(test, assert_instr(vpandq))] //should be vpandd, but generate vpandq
29346#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29347pub const fn _mm512_and_epi32(a: __m512i, b: __m512i) -> __m512i {
29348 unsafe { transmute(src:simd_and(x:a.as_i32x16(), y:b.as_i32x16())) }
29349}
29350
29351/// Performs element-by-element bitwise AND between packed 32-bit integer elements of a and b, storing the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
29352///
29353/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_and_epi32&expand=273)
29354#[inline]
29355#[target_feature(enable = "avx512f")]
29356#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29357#[cfg_attr(test, assert_instr(vpandd))]
29358#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29359pub const fn _mm512_mask_and_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
29360 unsafe {
29361 let and: Simd = _mm512_and_epi32(a, b).as_i32x16();
29362 transmute(src:simd_select_bitmask(m:k, yes:and, no:src.as_i32x16()))
29363 }
29364}
29365
29366/// Compute the bitwise AND of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
29367///
29368/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_and_epi32&expand=274)
29369#[inline]
29370#[target_feature(enable = "avx512f")]
29371#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29372#[cfg_attr(test, assert_instr(vpandd))]
29373#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29374pub const fn _mm512_maskz_and_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
29375 unsafe {
29376 let and: Simd = _mm512_and_epi32(a, b).as_i32x16();
29377 transmute(src:simd_select_bitmask(m:k, yes:and, no:i32x16::ZERO))
29378 }
29379}
29380
29381/// Performs element-by-element bitwise AND between packed 32-bit integer elements of a and b, storing the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
29382///
29383/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_and_epi32&expand=270)
29384#[inline]
29385#[target_feature(enable = "avx512f,avx512vl")]
29386#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29387#[cfg_attr(test, assert_instr(vpandd))]
29388#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29389pub const fn _mm256_mask_and_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
29390 unsafe {
29391 let and: Simd = simd_and(x:a.as_i32x8(), y:b.as_i32x8());
29392 transmute(src:simd_select_bitmask(m:k, yes:and, no:src.as_i32x8()))
29393 }
29394}
29395
29396/// Compute the bitwise AND of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
29397///
29398/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_and_epi32&expand=271)
29399#[inline]
29400#[target_feature(enable = "avx512f,avx512vl")]
29401#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29402#[cfg_attr(test, assert_instr(vpandd))]
29403#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29404pub const fn _mm256_maskz_and_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
29405 unsafe {
29406 let and: Simd = simd_and(x:a.as_i32x8(), y:b.as_i32x8());
29407 transmute(src:simd_select_bitmask(m:k, yes:and, no:i32x8::ZERO))
29408 }
29409}
29410
29411/// Performs element-by-element bitwise AND between packed 32-bit integer elements of a and b, storing the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
29412///
29413/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_and_epi32&expand=268)
29414#[inline]
29415#[target_feature(enable = "avx512f,avx512vl")]
29416#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29417#[cfg_attr(test, assert_instr(vpandd))]
29418#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29419pub const fn _mm_mask_and_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
29420 unsafe {
29421 let and: Simd = simd_and(x:a.as_i32x4(), y:b.as_i32x4());
29422 transmute(src:simd_select_bitmask(m:k, yes:and, no:src.as_i32x4()))
29423 }
29424}
29425
29426/// Compute the bitwise AND of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
29427///
29428/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_and_epi32&expand=269)
29429#[inline]
29430#[target_feature(enable = "avx512f,avx512vl")]
29431#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29432#[cfg_attr(test, assert_instr(vpandd))]
29433#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29434pub const fn _mm_maskz_and_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
29435 unsafe {
29436 let and: Simd = simd_and(x:a.as_i32x4(), y:b.as_i32x4());
29437 transmute(src:simd_select_bitmask(m:k, yes:and, no:i32x4::ZERO))
29438 }
29439}
29440
29441/// Compute the bitwise AND of 512 bits (composed of packed 64-bit integers) in a and b, and store the results in dst.
29442///
29443/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_and_epi64&expand=279)
29444#[inline]
29445#[target_feature(enable = "avx512f")]
29446#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29447#[cfg_attr(test, assert_instr(vpandq))]
29448#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29449pub const fn _mm512_and_epi64(a: __m512i, b: __m512i) -> __m512i {
29450 unsafe { transmute(src:simd_and(x:a.as_i64x8(), y:b.as_i64x8())) }
29451}
29452
29453/// Compute the bitwise AND of packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
29454///
29455/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_and_epi64&expand=280)
29456#[inline]
29457#[target_feature(enable = "avx512f")]
29458#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29459#[cfg_attr(test, assert_instr(vpandq))]
29460#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29461pub const fn _mm512_mask_and_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
29462 unsafe {
29463 let and: Simd = _mm512_and_epi64(a, b).as_i64x8();
29464 transmute(src:simd_select_bitmask(m:k, yes:and, no:src.as_i64x8()))
29465 }
29466}
29467
29468/// Compute the bitwise AND of packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
29469///
29470/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_and_epi64&expand=281)
29471#[inline]
29472#[target_feature(enable = "avx512f")]
29473#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29474#[cfg_attr(test, assert_instr(vpandq))]
29475#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29476pub const fn _mm512_maskz_and_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
29477 unsafe {
29478 let and: Simd = _mm512_and_epi64(a, b).as_i64x8();
29479 transmute(src:simd_select_bitmask(m:k, yes:and, no:i64x8::ZERO))
29480 }
29481}
29482
29483/// Compute the bitwise AND of packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
29484///
29485/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_and_epi64&expand=277)
29486#[inline]
29487#[target_feature(enable = "avx512f,avx512vl")]
29488#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29489#[cfg_attr(test, assert_instr(vpandq))]
29490#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29491pub const fn _mm256_mask_and_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
29492 unsafe {
29493 let and: Simd = simd_and(x:a.as_i64x4(), y:b.as_i64x4());
29494 transmute(src:simd_select_bitmask(m:k, yes:and, no:src.as_i64x4()))
29495 }
29496}
29497
29498/// Compute the bitwise AND of packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
29499///
29500/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_and_epi64&expand=278)
29501#[inline]
29502#[target_feature(enable = "avx512f,avx512vl")]
29503#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29504#[cfg_attr(test, assert_instr(vpandq))]
29505#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29506pub const fn _mm256_maskz_and_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
29507 unsafe {
29508 let and: Simd = simd_and(x:a.as_i64x4(), y:b.as_i64x4());
29509 transmute(src:simd_select_bitmask(m:k, yes:and, no:i64x4::ZERO))
29510 }
29511}
29512
29513/// Compute the bitwise AND of packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
29514///
29515/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_and_epi64&expand=275)
29516#[inline]
29517#[target_feature(enable = "avx512f,avx512vl")]
29518#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29519#[cfg_attr(test, assert_instr(vpandq))]
29520#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29521pub const fn _mm_mask_and_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
29522 unsafe {
29523 let and: Simd = simd_and(x:a.as_i64x2(), y:b.as_i64x2());
29524 transmute(src:simd_select_bitmask(m:k, yes:and, no:src.as_i64x2()))
29525 }
29526}
29527
29528/// Compute the bitwise AND of packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
29529///
29530/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_and_epi64&expand=276)
29531#[inline]
29532#[target_feature(enable = "avx512f,avx512vl")]
29533#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29534#[cfg_attr(test, assert_instr(vpandq))]
29535#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29536pub const fn _mm_maskz_and_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
29537 unsafe {
29538 let and: Simd = simd_and(x:a.as_i64x2(), y:b.as_i64x2());
29539 transmute(src:simd_select_bitmask(m:k, yes:and, no:i64x2::ZERO))
29540 }
29541}
29542
29543/// Compute the bitwise AND of 512 bits (representing integer data) in a and b, and store the result in dst.
29544///
29545/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_and_si512&expand=302)
29546#[inline]
29547#[target_feature(enable = "avx512f")]
29548#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29549#[cfg_attr(test, assert_instr(vpandq))]
29550#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29551pub const fn _mm512_and_si512(a: __m512i, b: __m512i) -> __m512i {
29552 unsafe { transmute(src:simd_and(x:a.as_i32x16(), y:b.as_i32x16())) }
29553}
29554
29555/// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst.
29556///
29557/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_or_epi32&expand=4042)
29558#[inline]
29559#[target_feature(enable = "avx512f")]
29560#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29561#[cfg_attr(test, assert_instr(vporq))]
29562#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29563pub const fn _mm512_or_epi32(a: __m512i, b: __m512i) -> __m512i {
29564 unsafe { transmute(src:simd_or(x:a.as_i32x16(), y:b.as_i32x16())) }
29565}
29566
29567/// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
29568///
29569/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_or_epi32&expand=4040)
29570#[inline]
29571#[target_feature(enable = "avx512f")]
29572#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29573#[cfg_attr(test, assert_instr(vpord))]
29574#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29575pub const fn _mm512_mask_or_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
29576 unsafe {
29577 let or: Simd = _mm512_or_epi32(a, b).as_i32x16();
29578 transmute(src:simd_select_bitmask(m:k, yes:or, no:src.as_i32x16()))
29579 }
29580}
29581
29582/// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
29583///
29584/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_or_epi32&expand=4041)
29585#[inline]
29586#[target_feature(enable = "avx512f")]
29587#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29588#[cfg_attr(test, assert_instr(vpord))]
29589#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29590pub const fn _mm512_maskz_or_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
29591 unsafe {
29592 let or: Simd = _mm512_or_epi32(a, b).as_i32x16();
29593 transmute(src:simd_select_bitmask(m:k, yes:or, no:i32x16::ZERO))
29594 }
29595}
29596
29597/// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst.
29598///
29599/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_or_epi32&expand=4039)
29600#[inline]
29601#[target_feature(enable = "avx512f,avx512vl")]
29602#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29603#[cfg_attr(test, assert_instr(vor))] //should be vpord
29604#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29605pub const fn _mm256_or_epi32(a: __m256i, b: __m256i) -> __m256i {
29606 unsafe { transmute(src:simd_or(x:a.as_i32x8(), y:b.as_i32x8())) }
29607}
29608
29609/// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
29610///
29611/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_or_epi32&expand=4037)
29612#[inline]
29613#[target_feature(enable = "avx512f,avx512vl")]
29614#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29615#[cfg_attr(test, assert_instr(vpord))]
29616#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29617pub const fn _mm256_mask_or_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
29618 unsafe {
29619 let or: Simd = _mm256_or_epi32(a, b).as_i32x8();
29620 transmute(src:simd_select_bitmask(m:k, yes:or, no:src.as_i32x8()))
29621 }
29622}
29623
29624/// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
29625///
29626/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_or_epi32&expand=4038)
29627#[inline]
29628#[target_feature(enable = "avx512f,avx512vl")]
29629#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29630#[cfg_attr(test, assert_instr(vpord))]
29631#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29632pub const fn _mm256_maskz_or_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
29633 unsafe {
29634 let or: Simd = _mm256_or_epi32(a, b).as_i32x8();
29635 transmute(src:simd_select_bitmask(m:k, yes:or, no:i32x8::ZERO))
29636 }
29637}
29638
29639/// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst.
29640///
29641/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_or_epi32&expand=4036)
29642#[inline]
29643#[target_feature(enable = "avx512f,avx512vl")]
29644#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29645#[cfg_attr(test, assert_instr(vor))] //should be vpord
29646#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29647pub const fn _mm_or_epi32(a: __m128i, b: __m128i) -> __m128i {
29648 unsafe { transmute(src:simd_or(x:a.as_i32x4(), y:b.as_i32x4())) }
29649}
29650
29651/// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
29652///
29653/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_or_epi32&expand=4034)
29654#[inline]
29655#[target_feature(enable = "avx512f,avx512vl")]
29656#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29657#[cfg_attr(test, assert_instr(vpord))]
29658#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29659pub const fn _mm_mask_or_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
29660 unsafe {
29661 let or: Simd = _mm_or_epi32(a, b).as_i32x4();
29662 transmute(src:simd_select_bitmask(m:k, yes:or, no:src.as_i32x4()))
29663 }
29664}
29665
29666/// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
29667///
29668/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_or_epi32&expand=4035)
29669#[inline]
29670#[target_feature(enable = "avx512f,avx512vl")]
29671#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29672#[cfg_attr(test, assert_instr(vpord))]
29673#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29674pub const fn _mm_maskz_or_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
29675 unsafe {
29676 let or: Simd = _mm_or_epi32(a, b).as_i32x4();
29677 transmute(src:simd_select_bitmask(m:k, yes:or, no:i32x4::ZERO))
29678 }
29679}
29680
29681/// Compute the bitwise OR of packed 64-bit integers in a and b, and store the resut in dst.
29682///
29683/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_or_epi64&expand=4051)
29684#[inline]
29685#[target_feature(enable = "avx512f")]
29686#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29687#[cfg_attr(test, assert_instr(vporq))]
29688#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29689pub const fn _mm512_or_epi64(a: __m512i, b: __m512i) -> __m512i {
29690 unsafe { transmute(src:simd_or(x:a.as_i64x8(), y:b.as_i64x8())) }
29691}
29692
29693/// Compute the bitwise OR of packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
29694///
29695/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_or_epi64&expand=4049)
29696#[inline]
29697#[target_feature(enable = "avx512f")]
29698#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29699#[cfg_attr(test, assert_instr(vporq))]
29700#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29701pub const fn _mm512_mask_or_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
29702 unsafe {
29703 let or: Simd = _mm512_or_epi64(a, b).as_i64x8();
29704 transmute(src:simd_select_bitmask(m:k, yes:or, no:src.as_i64x8()))
29705 }
29706}
29707
29708/// Compute the bitwise OR of packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
29709///
29710/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_or_epi64&expand=4050)
29711#[inline]
29712#[target_feature(enable = "avx512f")]
29713#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29714#[cfg_attr(test, assert_instr(vporq))]
29715#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29716pub const fn _mm512_maskz_or_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
29717 unsafe {
29718 let or: Simd = _mm512_or_epi64(a, b).as_i64x8();
29719 transmute(src:simd_select_bitmask(m:k, yes:or, no:i64x8::ZERO))
29720 }
29721}
29722
29723/// Compute the bitwise OR of packed 64-bit integers in a and b, and store the resut in dst.
29724///
29725/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_or_epi64&expand=4048)
29726#[inline]
29727#[target_feature(enable = "avx512f,avx512vl")]
29728#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29729#[cfg_attr(test, assert_instr(vor))] //should be vporq
29730#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29731pub const fn _mm256_or_epi64(a: __m256i, b: __m256i) -> __m256i {
29732 unsafe { transmute(src:simd_or(x:a.as_i64x4(), y:b.as_i64x4())) }
29733}
29734
29735/// Compute the bitwise OR of packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
29736///
29737/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_or_epi64&expand=4046)
29738#[inline]
29739#[target_feature(enable = "avx512f,avx512vl")]
29740#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29741#[cfg_attr(test, assert_instr(vporq))]
29742#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29743pub const fn _mm256_mask_or_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
29744 unsafe {
29745 let or: Simd = _mm256_or_epi64(a, b).as_i64x4();
29746 transmute(src:simd_select_bitmask(m:k, yes:or, no:src.as_i64x4()))
29747 }
29748}
29749
29750/// Compute the bitwise OR of packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
29751///
29752/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_or_epi64&expand=4047)
29753#[inline]
29754#[target_feature(enable = "avx512f,avx512vl")]
29755#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29756#[cfg_attr(test, assert_instr(vporq))]
29757#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29758pub const fn _mm256_maskz_or_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
29759 unsafe {
29760 let or: Simd = _mm256_or_epi64(a, b).as_i64x4();
29761 transmute(src:simd_select_bitmask(m:k, yes:or, no:i64x4::ZERO))
29762 }
29763}
29764
29765/// Compute the bitwise OR of packed 64-bit integers in a and b, and store the resut in dst.
29766///
29767/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_or_epi64&expand=4045)
29768#[inline]
29769#[target_feature(enable = "avx512f,avx512vl")]
29770#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29771#[cfg_attr(test, assert_instr(vor))] //should be vporq
29772#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29773pub const fn _mm_or_epi64(a: __m128i, b: __m128i) -> __m128i {
29774 unsafe { transmute(src:simd_or(x:a.as_i64x2(), y:b.as_i64x2())) }
29775}
29776
29777/// Compute the bitwise OR of packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
29778///
29779/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_or_epi64&expand=4043)
29780#[inline]
29781#[target_feature(enable = "avx512f,avx512vl")]
29782#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29783#[cfg_attr(test, assert_instr(vporq))]
29784#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29785pub const fn _mm_mask_or_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
29786 unsafe {
29787 let or: Simd = _mm_or_epi64(a, b).as_i64x2();
29788 transmute(src:simd_select_bitmask(m:k, yes:or, no:src.as_i64x2()))
29789 }
29790}
29791
29792/// Compute the bitwise OR of packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
29793///
29794/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_or_epi64&expand=4044)
29795#[inline]
29796#[target_feature(enable = "avx512f,avx512vl")]
29797#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29798#[cfg_attr(test, assert_instr(vporq))]
29799#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29800pub const fn _mm_maskz_or_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
29801 unsafe {
29802 let or: Simd = _mm_or_epi64(a, b).as_i64x2();
29803 transmute(src:simd_select_bitmask(m:k, yes:or, no:i64x2::ZERO))
29804 }
29805}
29806
29807/// Compute the bitwise OR of 512 bits (representing integer data) in a and b, and store the result in dst.
29808///
29809/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_or_si512&expand=4072)
29810#[inline]
29811#[target_feature(enable = "avx512f")]
29812#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29813#[cfg_attr(test, assert_instr(vporq))]
29814#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29815pub const fn _mm512_or_si512(a: __m512i, b: __m512i) -> __m512i {
29816 unsafe { transmute(src:simd_or(x:a.as_i32x16(), y:b.as_i32x16())) }
29817}
29818
29819/// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst.
29820///
29821/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_xor_epi32&expand=6142)
29822#[inline]
29823#[target_feature(enable = "avx512f")]
29824#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29825#[cfg_attr(test, assert_instr(vpxorq))] //should be vpxord
29826#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29827pub const fn _mm512_xor_epi32(a: __m512i, b: __m512i) -> __m512i {
29828 unsafe { transmute(src:simd_xor(x:a.as_i32x16(), y:b.as_i32x16())) }
29829}
29830
29831/// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
29832///
29833/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_xor_epi32&expand=6140)
29834#[inline]
29835#[target_feature(enable = "avx512f")]
29836#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29837#[cfg_attr(test, assert_instr(vpxord))]
29838#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29839pub const fn _mm512_mask_xor_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
29840 unsafe {
29841 let xor: Simd = _mm512_xor_epi32(a, b).as_i32x16();
29842 transmute(src:simd_select_bitmask(m:k, yes:xor, no:src.as_i32x16()))
29843 }
29844}
29845
29846/// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
29847///
29848/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_xor_epi32&expand=6141)
29849#[inline]
29850#[target_feature(enable = "avx512f")]
29851#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29852#[cfg_attr(test, assert_instr(vpxord))]
29853#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29854pub const fn _mm512_maskz_xor_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
29855 unsafe {
29856 let xor: Simd = _mm512_xor_epi32(a, b).as_i32x16();
29857 transmute(src:simd_select_bitmask(m:k, yes:xor, no:i32x16::ZERO))
29858 }
29859}
29860
29861/// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst.
29862///
29863/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_xor_epi32&expand=6139)
29864#[inline]
29865#[target_feature(enable = "avx512f,avx512vl")]
29866#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29867#[cfg_attr(test, assert_instr(vxor))] //should be vpxord
29868#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29869pub const fn _mm256_xor_epi32(a: __m256i, b: __m256i) -> __m256i {
29870 unsafe { transmute(src:simd_xor(x:a.as_i32x8(), y:b.as_i32x8())) }
29871}
29872
29873/// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
29874///
29875/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_xor_epi32&expand=6137)
29876#[inline]
29877#[target_feature(enable = "avx512f,avx512vl")]
29878#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29879#[cfg_attr(test, assert_instr(vpxord))]
29880#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29881pub const fn _mm256_mask_xor_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
29882 unsafe {
29883 let xor: Simd = _mm256_xor_epi32(a, b).as_i32x8();
29884 transmute(src:simd_select_bitmask(m:k, yes:xor, no:src.as_i32x8()))
29885 }
29886}
29887
29888/// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
29889///
29890/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_xor_epi32&expand=6138)
29891#[inline]
29892#[target_feature(enable = "avx512f,avx512vl")]
29893#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29894#[cfg_attr(test, assert_instr(vpxord))]
29895#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29896pub const fn _mm256_maskz_xor_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
29897 unsafe {
29898 let xor: Simd = _mm256_xor_epi32(a, b).as_i32x8();
29899 transmute(src:simd_select_bitmask(m:k, yes:xor, no:i32x8::ZERO))
29900 }
29901}
29902
29903/// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst.
29904///
29905/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_xor_epi32&expand=6136)
29906#[inline]
29907#[target_feature(enable = "avx512f,avx512vl")]
29908#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29909#[cfg_attr(test, assert_instr(vxor))] //should be vpxord
29910#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29911pub const fn _mm_xor_epi32(a: __m128i, b: __m128i) -> __m128i {
29912 unsafe { transmute(src:simd_xor(x:a.as_i32x4(), y:b.as_i32x4())) }
29913}
29914
29915/// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
29916///
29917/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_xor_epi32&expand=6134)
29918#[inline]
29919#[target_feature(enable = "avx512f,avx512vl")]
29920#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29921#[cfg_attr(test, assert_instr(vpxord))]
29922#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29923pub const fn _mm_mask_xor_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
29924 unsafe {
29925 let xor: Simd = _mm_xor_epi32(a, b).as_i32x4();
29926 transmute(src:simd_select_bitmask(m:k, yes:xor, no:src.as_i32x4()))
29927 }
29928}
29929
29930/// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
29931///
29932/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_xor_epi32&expand=6135)
29933#[inline]
29934#[target_feature(enable = "avx512f,avx512vl")]
29935#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29936#[cfg_attr(test, assert_instr(vpxord))]
29937#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29938pub const fn _mm_maskz_xor_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
29939 unsafe {
29940 let xor: Simd = _mm_xor_epi32(a, b).as_i32x4();
29941 transmute(src:simd_select_bitmask(m:k, yes:xor, no:i32x4::ZERO))
29942 }
29943}
29944
29945/// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst.
29946///
29947/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_xor_epi64&expand=6151)
29948#[inline]
29949#[target_feature(enable = "avx512f")]
29950#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29951#[cfg_attr(test, assert_instr(vpxorq))]
29952#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29953pub const fn _mm512_xor_epi64(a: __m512i, b: __m512i) -> __m512i {
29954 unsafe { transmute(src:simd_xor(x:a.as_i64x8(), y:b.as_i64x8())) }
29955}
29956
29957/// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
29958///
29959/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_xor_epi64&expand=6149)
29960#[inline]
29961#[target_feature(enable = "avx512f")]
29962#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29963#[cfg_attr(test, assert_instr(vpxorq))]
29964#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29965pub const fn _mm512_mask_xor_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
29966 unsafe {
29967 let xor: Simd = _mm512_xor_epi64(a, b).as_i64x8();
29968 transmute(src:simd_select_bitmask(m:k, yes:xor, no:src.as_i64x8()))
29969 }
29970}
29971
29972/// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
29973///
29974/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_xor_epi64&expand=6150)
29975#[inline]
29976#[target_feature(enable = "avx512f")]
29977#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29978#[cfg_attr(test, assert_instr(vpxorq))]
29979#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29980pub const fn _mm512_maskz_xor_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
29981 unsafe {
29982 let xor: Simd = _mm512_xor_epi64(a, b).as_i64x8();
29983 transmute(src:simd_select_bitmask(m:k, yes:xor, no:i64x8::ZERO))
29984 }
29985}
29986
29987/// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst.
29988///
29989/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_xor_epi64&expand=6148)
29990#[inline]
29991#[target_feature(enable = "avx512f,avx512vl")]
29992#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29993#[cfg_attr(test, assert_instr(vxor))] //should be vpxorq
29994#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29995pub const fn _mm256_xor_epi64(a: __m256i, b: __m256i) -> __m256i {
29996 unsafe { transmute(src:simd_xor(x:a.as_i64x4(), y:b.as_i64x4())) }
29997}
29998
29999/// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
30000///
30001/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_xor_epi64&expand=6146)
30002#[inline]
30003#[target_feature(enable = "avx512f,avx512vl")]
30004#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30005#[cfg_attr(test, assert_instr(vpxorq))]
30006#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30007pub const fn _mm256_mask_xor_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
30008 unsafe {
30009 let xor: Simd = _mm256_xor_epi64(a, b).as_i64x4();
30010 transmute(src:simd_select_bitmask(m:k, yes:xor, no:src.as_i64x4()))
30011 }
30012}
30013
30014/// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
30015///
30016/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_xor_epi64&expand=6147)
30017#[inline]
30018#[target_feature(enable = "avx512f,avx512vl")]
30019#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30020#[cfg_attr(test, assert_instr(vpxorq))]
30021#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30022pub const fn _mm256_maskz_xor_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
30023 unsafe {
30024 let xor: Simd = _mm256_xor_epi64(a, b).as_i64x4();
30025 transmute(src:simd_select_bitmask(m:k, yes:xor, no:i64x4::ZERO))
30026 }
30027}
30028
30029/// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst.
30030///
30031/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_xor_epi64&expand=6145)
30032#[inline]
30033#[target_feature(enable = "avx512f,avx512vl")]
30034#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30035#[cfg_attr(test, assert_instr(vxor))] //should be vpxorq
30036#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30037pub const fn _mm_xor_epi64(a: __m128i, b: __m128i) -> __m128i {
30038 unsafe { transmute(src:simd_xor(x:a.as_i64x2(), y:b.as_i64x2())) }
30039}
30040
30041/// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
30042///
30043/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_xor_epi64&expand=6143)
30044#[inline]
30045#[target_feature(enable = "avx512f,avx512vl")]
30046#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30047#[cfg_attr(test, assert_instr(vpxorq))]
30048#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30049pub const fn _mm_mask_xor_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
30050 unsafe {
30051 let xor: Simd = _mm_xor_epi64(a, b).as_i64x2();
30052 transmute(src:simd_select_bitmask(m:k, yes:xor, no:src.as_i64x2()))
30053 }
30054}
30055
30056/// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
30057///
30058/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_xor_epi64&expand=6144)
30059#[inline]
30060#[target_feature(enable = "avx512f,avx512vl")]
30061#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30062#[cfg_attr(test, assert_instr(vpxorq))]
30063#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30064pub const fn _mm_maskz_xor_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
30065 unsafe {
30066 let xor: Simd = _mm_xor_epi64(a, b).as_i64x2();
30067 transmute(src:simd_select_bitmask(m:k, yes:xor, no:i64x2::ZERO))
30068 }
30069}
30070
30071/// Compute the bitwise XOR of 512 bits (representing integer data) in a and b, and store the result in dst.
30072///
30073/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_xor_si512&expand=6172)
30074#[inline]
30075#[target_feature(enable = "avx512f")]
30076#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30077#[cfg_attr(test, assert_instr(vpxorq))]
30078#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30079pub const fn _mm512_xor_si512(a: __m512i, b: __m512i) -> __m512i {
30080 unsafe { transmute(src:simd_xor(x:a.as_i32x16(), y:b.as_i32x16())) }
30081}
30082
30083/// Compute the bitwise NOT of packed 32-bit integers in a and then AND with b, and store the results in dst.
30084///
30085/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_andnot_epi32&expand=310)
30086#[inline]
30087#[target_feature(enable = "avx512f")]
30088#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30089#[cfg_attr(test, assert_instr(vpandnq))] //should be vpandnd
30090#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30091pub const fn _mm512_andnot_epi32(a: __m512i, b: __m512i) -> __m512i {
30092 _mm512_and_epi32(a:_mm512_xor_epi32(a, b:_mm512_set1_epi32(u32::MAX as i32)), b)
30093}
30094
30095/// Compute the bitwise NOT of packed 32-bit integers in a and then AND with b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
30096///
30097/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_andnot_epi32&expand=311)
30098#[inline]
30099#[target_feature(enable = "avx512f")]
30100#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30101#[cfg_attr(test, assert_instr(vpandnd))]
30102#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30103pub const fn _mm512_mask_andnot_epi32(
30104 src: __m512i,
30105 k: __mmask16,
30106 a: __m512i,
30107 b: __m512i,
30108) -> __m512i {
30109 unsafe {
30110 let andnot: Simd = _mm512_andnot_epi32(a, b).as_i32x16();
30111 transmute(src:simd_select_bitmask(m:k, yes:andnot, no:src.as_i32x16()))
30112 }
30113}
30114
30115/// Compute the bitwise NOT of packed 32-bit integers in a and then AND with b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
30116///
30117/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_andnot_epi32&expand=312)
30118#[inline]
30119#[target_feature(enable = "avx512f")]
30120#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30121#[cfg_attr(test, assert_instr(vpandnd))]
30122#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30123pub const fn _mm512_maskz_andnot_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
30124 unsafe {
30125 let andnot: Simd = _mm512_andnot_epi32(a, b).as_i32x16();
30126 transmute(src:simd_select_bitmask(m:k, yes:andnot, no:i32x16::ZERO))
30127 }
30128}
30129
30130/// Compute the bitwise NOT of packed 32-bit integers in a and then AND with b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
30131///
30132/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_andnot_epi32&expand=308)
30133#[inline]
30134#[target_feature(enable = "avx512f,avx512vl")]
30135#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30136#[cfg_attr(test, assert_instr(vpandnd))]
30137#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30138pub const fn _mm256_mask_andnot_epi32(
30139 src: __m256i,
30140 k: __mmask8,
30141 a: __m256i,
30142 b: __m256i,
30143) -> __m256i {
30144 unsafe {
30145 let not: __m256i = _mm256_xor_epi32(a, b:_mm256_set1_epi32(u32::MAX as i32));
30146 let andnot: Simd = simd_and(x:not.as_i32x8(), y:b.as_i32x8());
30147 transmute(src:simd_select_bitmask(m:k, yes:andnot, no:src.as_i32x8()))
30148 }
30149}
30150
30151/// Compute the bitwise NOT of packed 32-bit integers in a and then AND with b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
30152///
30153/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_andnot_epi32&expand=309)
30154#[inline]
30155#[target_feature(enable = "avx512f,avx512vl")]
30156#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30157#[cfg_attr(test, assert_instr(vpandnd))]
30158#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30159pub const fn _mm256_maskz_andnot_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
30160 unsafe {
30161 let not: __m256i = _mm256_xor_epi32(a, b:_mm256_set1_epi32(u32::MAX as i32));
30162 let andnot: Simd = simd_and(x:not.as_i32x8(), y:b.as_i32x8());
30163 transmute(src:simd_select_bitmask(m:k, yes:andnot, no:i32x8::ZERO))
30164 }
30165}
30166
30167/// Compute the bitwise NOT of packed 32-bit integers in a and then AND with b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
30168///
30169/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_andnot_epi32&expand=306)
30170#[inline]
30171#[target_feature(enable = "avx512f,avx512vl")]
30172#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30173#[cfg_attr(test, assert_instr(vpandnd))]
30174#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30175pub const fn _mm_mask_andnot_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
30176 unsafe {
30177 let not: __m128i = _mm_xor_epi32(a, b:_mm_set1_epi32(u32::MAX as i32));
30178 let andnot: Simd = simd_and(x:not.as_i32x4(), y:b.as_i32x4());
30179 transmute(src:simd_select_bitmask(m:k, yes:andnot, no:src.as_i32x4()))
30180 }
30181}
30182
30183/// Compute the bitwise NOT of packed 32-bit integers in a and then AND with b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
30184///
30185/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_andnot_epi32&expand=307)
30186#[inline]
30187#[target_feature(enable = "avx512f,avx512vl")]
30188#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30189#[cfg_attr(test, assert_instr(vpandnd))]
30190#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30191pub const fn _mm_maskz_andnot_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
30192 unsafe {
30193 let not: __m128i = _mm_xor_epi32(a, b:_mm_set1_epi32(u32::MAX as i32));
30194 let andnot: Simd = simd_and(x:not.as_i32x4(), y:b.as_i32x4());
30195 transmute(src:simd_select_bitmask(m:k, yes:andnot, no:i32x4::ZERO))
30196 }
30197}
30198
30199/// Compute the bitwise NOT of 512 bits (composed of packed 64-bit integers) in a and then AND with b, and store the results in dst.
30200///
30201/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_andnot_epi64&expand=317)
30202#[inline]
30203#[target_feature(enable = "avx512f")]
30204#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30205#[cfg_attr(test, assert_instr(vpandnq))] //should be vpandnd
30206#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30207pub const fn _mm512_andnot_epi64(a: __m512i, b: __m512i) -> __m512i {
30208 _mm512_and_epi64(a:_mm512_xor_epi64(a, b:_mm512_set1_epi64(u64::MAX as i64)), b)
30209}
30210
30211/// Compute the bitwise NOT of packed 64-bit integers in a and then AND with b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
30212///
30213/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_andnot_epi64&expand=318)
30214#[inline]
30215#[target_feature(enable = "avx512f")]
30216#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30217#[cfg_attr(test, assert_instr(vpandnq))]
30218#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30219pub const fn _mm512_mask_andnot_epi64(
30220 src: __m512i,
30221 k: __mmask8,
30222 a: __m512i,
30223 b: __m512i,
30224) -> __m512i {
30225 unsafe {
30226 let andnot: Simd = _mm512_andnot_epi64(a, b).as_i64x8();
30227 transmute(src:simd_select_bitmask(m:k, yes:andnot, no:src.as_i64x8()))
30228 }
30229}
30230
30231/// Compute the bitwise NOT of packed 64-bit integers in a and then AND with b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
30232///
30233/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_andnot_epi64&expand=319)
30234#[inline]
30235#[target_feature(enable = "avx512f")]
30236#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30237#[cfg_attr(test, assert_instr(vpandnq))]
30238#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30239pub const fn _mm512_maskz_andnot_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
30240 unsafe {
30241 let andnot: Simd = _mm512_andnot_epi64(a, b).as_i64x8();
30242 transmute(src:simd_select_bitmask(m:k, yes:andnot, no:i64x8::ZERO))
30243 }
30244}
30245
30246/// Compute the bitwise NOT of packed 64-bit integers in a and then AND with b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
30247///
30248/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_andnot_epi64&expand=315)
30249#[inline]
30250#[target_feature(enable = "avx512f,avx512vl")]
30251#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30252#[cfg_attr(test, assert_instr(vpandnq))]
30253#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30254pub const fn _mm256_mask_andnot_epi64(
30255 src: __m256i,
30256 k: __mmask8,
30257 a: __m256i,
30258 b: __m256i,
30259) -> __m256i {
30260 unsafe {
30261 let not: __m256i = _mm256_xor_epi64(a, b:_mm256_set1_epi64x(u64::MAX as i64));
30262 let andnot: Simd = simd_and(x:not.as_i64x4(), y:b.as_i64x4());
30263 transmute(src:simd_select_bitmask(m:k, yes:andnot, no:src.as_i64x4()))
30264 }
30265}
30266
30267/// Compute the bitwise NOT of packed 64-bit integers in a and then AND with b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
30268///
30269/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_andnot_epi64&expand=316)
30270#[inline]
30271#[target_feature(enable = "avx512f,avx512vl")]
30272#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30273#[cfg_attr(test, assert_instr(vpandnq))]
30274#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30275pub const fn _mm256_maskz_andnot_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
30276 unsafe {
30277 let not: __m256i = _mm256_xor_epi64(a, b:_mm256_set1_epi64x(u64::MAX as i64));
30278 let andnot: Simd = simd_and(x:not.as_i64x4(), y:b.as_i64x4());
30279 transmute(src:simd_select_bitmask(m:k, yes:andnot, no:i64x4::ZERO))
30280 }
30281}
30282
30283/// Compute the bitwise NOT of packed 64-bit integers in a and then AND with b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
30284///
30285/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_andnot_epi64&expand=313)
30286#[inline]
30287#[target_feature(enable = "avx512f,avx512vl")]
30288#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30289#[cfg_attr(test, assert_instr(vpandnq))]
30290#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30291pub const fn _mm_mask_andnot_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
30292 unsafe {
30293 let not: __m128i = _mm_xor_epi64(a, b:_mm_set1_epi64x(u64::MAX as i64));
30294 let andnot: Simd = simd_and(x:not.as_i64x2(), y:b.as_i64x2());
30295 transmute(src:simd_select_bitmask(m:k, yes:andnot, no:src.as_i64x2()))
30296 }
30297}
30298
30299/// Compute the bitwise NOT of packed 64-bit integers in a and then AND with b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
30300///
30301/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_andnot_epi64&expand=314)
30302#[inline]
30303#[target_feature(enable = "avx512f,avx512vl")]
30304#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30305#[cfg_attr(test, assert_instr(vpandnq))]
30306#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30307pub const fn _mm_maskz_andnot_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
30308 unsafe {
30309 let not: __m128i = _mm_xor_epi64(a, b:_mm_set1_epi64x(u64::MAX as i64));
30310 let andnot: Simd = simd_and(x:not.as_i64x2(), y:b.as_i64x2());
30311 transmute(src:simd_select_bitmask(m:k, yes:andnot, no:i64x2::ZERO))
30312 }
30313}
30314
30315/// Compute the bitwise NOT of 512 bits (representing integer data) in a and then AND with b, and store the result in dst.
30316///
30317/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_andnot_si512&expand=340)
30318#[inline]
30319#[target_feature(enable = "avx512f")]
30320#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30321#[cfg_attr(test, assert_instr(vpandnq))]
30322#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30323pub const fn _mm512_andnot_si512(a: __m512i, b: __m512i) -> __m512i {
30324 _mm512_and_epi64(a:_mm512_xor_epi64(a, b:_mm512_set1_epi64(u64::MAX as i64)), b)
30325}
30326
30327/// Convert 16-bit mask a into an integer value, and store the result in dst.
30328///
30329/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_cvtmask16_u32)
30330#[inline]
30331#[target_feature(enable = "avx512f")]
30332#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30333#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30334pub const fn _cvtmask16_u32(a: __mmask16) -> u32 {
30335 a as u32
30336}
30337
30338/// Convert 32-bit integer value a to an 16-bit mask and store the result in dst.
30339///
30340/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_cvtu32_mask16)
30341#[inline]
30342#[target_feature(enable = "avx512f")]
30343#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30344#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30345pub const fn _cvtu32_mask16(a: u32) -> __mmask16 {
30346 a as __mmask16
30347}
30348
30349/// Compute the bitwise AND of 16-bit masks a and b, and store the result in k.
30350///
30351/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=kand_mask16&expand=3212)
30352#[inline]
30353#[target_feature(enable = "avx512f")]
30354#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30355#[cfg_attr(test, assert_instr(and))] // generate normal and code instead of kandw
30356#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30357pub const fn _kand_mask16(a: __mmask16, b: __mmask16) -> __mmask16 {
30358 a & b
30359}
30360
30361/// Compute the bitwise AND of 16-bit masks a and b, and store the result in k.
30362///
30363/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_kand&expand=3210)
30364#[inline]
30365#[target_feature(enable = "avx512f")]
30366#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30367#[cfg_attr(test, assert_instr(and))] // generate normal and code instead of kandw
30368#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30369pub const fn _mm512_kand(a: __mmask16, b: __mmask16) -> __mmask16 {
30370 a & b
30371}
30372
30373/// Compute the bitwise OR of 16-bit masks a and b, and store the result in k.
30374///
30375/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=kor_mask16&expand=3239)
30376#[inline]
30377#[target_feature(enable = "avx512f")]
30378#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30379#[cfg_attr(test, assert_instr(or))] // generate normal or code instead of korw
30380#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30381pub const fn _kor_mask16(a: __mmask16, b: __mmask16) -> __mmask16 {
30382 a | b
30383}
30384
30385/// Compute the bitwise OR of 16-bit masks a and b, and store the result in k.
30386///
30387/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_kor&expand=3237)
30388#[inline]
30389#[target_feature(enable = "avx512f")]
30390#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30391#[cfg_attr(test, assert_instr(or))] // generate normal or code instead of korw
30392#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30393pub const fn _mm512_kor(a: __mmask16, b: __mmask16) -> __mmask16 {
30394 a | b
30395}
30396
30397/// Compute the bitwise XOR of 16-bit masks a and b, and store the result in k.
30398///
30399/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=kxor_mask16&expand=3291)
30400#[inline]
30401#[target_feature(enable = "avx512f")]
30402#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30403#[cfg_attr(test, assert_instr(xor))] // generate normal xor code instead of kxorw
30404#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30405pub const fn _kxor_mask16(a: __mmask16, b: __mmask16) -> __mmask16 {
30406 a ^ b
30407}
30408
30409/// Compute the bitwise XOR of 16-bit masks a and b, and store the result in k.
30410///
30411/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_kxor&expand=3289)
30412#[inline]
30413#[target_feature(enable = "avx512f")]
30414#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30415#[cfg_attr(test, assert_instr(xor))] // generate normal xor code instead of kxorw
30416#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30417pub const fn _mm512_kxor(a: __mmask16, b: __mmask16) -> __mmask16 {
30418 a ^ b
30419}
30420
30421/// Compute the bitwise NOT of 16-bit mask a, and store the result in k.
30422///
30423/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=knot_mask16&expand=3233)
30424#[inline]
30425#[target_feature(enable = "avx512f")]
30426#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30427#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30428pub const fn _knot_mask16(a: __mmask16) -> __mmask16 {
30429 a ^ 0b11111111_11111111
30430}
30431
30432/// Compute the bitwise NOT of 16-bit mask a, and store the result in k.
30433///
30434/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_knot&expand=3231)
30435#[inline]
30436#[target_feature(enable = "avx512f")]
30437#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30438#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30439pub const fn _mm512_knot(a: __mmask16) -> __mmask16 {
30440 a ^ 0b11111111_11111111
30441}
30442
30443/// Compute the bitwise NOT of 16-bit masks a and then AND with b, and store the result in k.
30444///
30445/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=kandn_mask16&expand=3218)
30446#[inline]
30447#[target_feature(enable = "avx512f")]
30448#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30449#[cfg_attr(test, assert_instr(not))] // generate normal and, not code instead of kandnw
30450#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30451pub const fn _kandn_mask16(a: __mmask16, b: __mmask16) -> __mmask16 {
30452 _mm512_kand(a:_mm512_knot(a), b)
30453}
30454
30455/// Compute the bitwise NOT of 16-bit masks a and then AND with b, and store the result in k.
30456///
30457/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_kandn&expand=3216)
30458#[inline]
30459#[target_feature(enable = "avx512f")]
30460#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30461#[cfg_attr(test, assert_instr(not))] // generate normal and code instead of kandw
30462#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30463pub const fn _mm512_kandn(a: __mmask16, b: __mmask16) -> __mmask16 {
30464 _mm512_kand(a:_mm512_knot(a), b)
30465}
30466
30467/// Compute the bitwise XNOR of 16-bit masks a and b, and store the result in k.
30468///
30469/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=kxnor_mask16&expand=3285)
30470#[inline]
30471#[target_feature(enable = "avx512f")]
30472#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30473#[cfg_attr(test, assert_instr(xor))] // generate normal xor, not code instead of kxnorw
30474#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30475pub const fn _kxnor_mask16(a: __mmask16, b: __mmask16) -> __mmask16 {
30476 _mm512_knot(_mm512_kxor(a, b))
30477}
30478
30479/// Compute the bitwise XNOR of 16-bit masks a and b, and store the result in k.
30480///
30481/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_kxnor&expand=3283)
30482#[inline]
30483#[target_feature(enable = "avx512f")]
30484#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30485#[cfg_attr(test, assert_instr(xor))] // generate normal and code instead of kandw
30486#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30487pub const fn _mm512_kxnor(a: __mmask16, b: __mmask16) -> __mmask16 {
30488 _mm512_knot(_mm512_kxor(a, b))
30489}
30490
30491/// Compute the bitwise OR of 16-bit masks a and b. If the result is all zeros, store 1 in dst, otherwise
30492/// store 0 in dst. If the result is all ones, store 1 in all_ones, otherwise store 0 in all_ones.
30493///
30494/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kortest_mask16_u8)
30495#[inline]
30496#[target_feature(enable = "avx512f")]
30497#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30498#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30499pub const unsafe fn _kortest_mask16_u8(a: __mmask16, b: __mmask16, all_ones: *mut u8) -> u8 {
30500 let tmp: u16 = _kor_mask16(a, b);
30501 *all_ones = (tmp == 0xffff) as u8;
30502 (tmp == 0) as u8
30503}
30504
30505/// Compute the bitwise OR of 16-bit masks a and b. If the result is all ones, store 1 in dst, otherwise
30506/// store 0 in dst.
30507///
30508/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kortestc_mask16_u8)
30509#[inline]
30510#[target_feature(enable = "avx512f")]
30511#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30512#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30513pub const fn _kortestc_mask16_u8(a: __mmask16, b: __mmask16) -> u8 {
30514 (_kor_mask16(a, b) == 0xffff) as u8
30515}
30516
30517/// Compute the bitwise OR of 16-bit masks a and b. If the result is all zeros, store 1 in dst, otherwise
30518/// store 0 in dst.
30519///
30520/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kortestz_mask16_u8)
30521#[inline]
30522#[target_feature(enable = "avx512f")]
30523#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30524#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30525pub const fn _kortestz_mask16_u8(a: __mmask16, b: __mmask16) -> u8 {
30526 (_kor_mask16(a, b) == 0) as u8
30527}
30528
30529/// Shift 16-bit mask a left by count bits while shifting in zeros, and store the result in dst.
30530///
30531/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kshiftli_mask16)
30532#[inline]
30533#[target_feature(enable = "avx512f")]
30534#[rustc_legacy_const_generics(1)]
30535#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30536#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30537pub const fn _kshiftli_mask16<const COUNT: u32>(a: __mmask16) -> __mmask16 {
30538 a.unbounded_shl(COUNT)
30539}
30540
30541/// Shift 16-bit mask a right by count bits while shifting in zeros, and store the result in dst.
30542///
30543/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kshiftri_mask16)
30544#[inline]
30545#[target_feature(enable = "avx512f")]
30546#[rustc_legacy_const_generics(1)]
30547#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30548#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30549pub const fn _kshiftri_mask16<const COUNT: u32>(a: __mmask16) -> __mmask16 {
30550 a.unbounded_shr(COUNT)
30551}
30552
30553/// Load 16-bit mask from memory
30554///
30555/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_load_mask16)
30556#[inline]
30557#[target_feature(enable = "avx512f")]
30558#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30559#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30560pub const unsafe fn _load_mask16(mem_addr: *const __mmask16) -> __mmask16 {
30561 *mem_addr
30562}
30563
30564/// Store 16-bit mask to memory
30565///
30566/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_store_mask16)
30567#[inline]
30568#[target_feature(enable = "avx512f")]
30569#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30570#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30571pub const unsafe fn _store_mask16(mem_addr: *mut __mmask16, a: __mmask16) {
30572 *mem_addr = a;
30573}
30574
30575/// Copy 16-bit mask a to k.
30576///
30577/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm512_kmov&expand=3228)
30578#[inline]
30579#[target_feature(enable = "avx512f")]
30580#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30581#[cfg_attr(test, assert_instr(mov))] // generate normal and code instead of kmovw
30582#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30583pub const fn _mm512_kmov(a: __mmask16) -> __mmask16 {
30584 a
30585}
30586
30587/// Converts integer mask into bitmask, storing the result in dst.
30588///
30589/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_int2mask&expand=3189)
30590#[inline]
30591#[target_feature(enable = "avx512f")] // generate normal and code instead of kmovw
30592#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30593#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30594pub const fn _mm512_int2mask(mask: i32) -> __mmask16 {
30595 mask as u16
30596}
30597
30598/// Converts bit mask k1 into an integer value, storing the results in dst.
30599///
30600/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask2int&expand=3544)
30601#[inline]
30602#[target_feature(enable = "avx512f")]
30603#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30604#[cfg_attr(test, assert_instr(mov))] // generate normal and code instead of kmovw
30605#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30606pub const fn _mm512_mask2int(k1: __mmask16) -> i32 {
30607 k1 as i32
30608}
30609
30610/// Unpack and interleave 8 bits from masks a and b, and store the 16-bit result in k.
30611///
30612/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_kunpackb&expand=3280)
30613#[inline]
30614#[target_feature(enable = "avx512f")]
30615#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30616#[cfg_attr(test, assert_instr(mov))] // generate normal and code instead of kunpckbw
30617#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30618pub const fn _mm512_kunpackb(a: __mmask16, b: __mmask16) -> __mmask16 {
30619 ((a & 0xff) << 8) | (b & 0xff)
30620}
30621
30622/// Performs bitwise OR between k1 and k2, storing the result in dst. CF flag is set if dst consists of all 1's.
30623///
30624/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_kortestc&expand=3247)
30625#[inline]
30626#[target_feature(enable = "avx512f")]
30627#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30628#[cfg_attr(test, assert_instr(cmp))] // generate normal and code instead of kortestw
30629#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30630pub const fn _mm512_kortestc(a: __mmask16, b: __mmask16) -> i32 {
30631 let r: bool = (a | b) == 0b11111111_11111111;
30632 r as i32
30633}
30634
30635/// Performs bitwise OR between k1 and k2, storing the result in dst. ZF flag is set if dst is 0.
30636///
30637/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_kortestz)
30638#[inline]
30639#[target_feature(enable = "avx512f")]
30640#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30641#[cfg_attr(test, assert_instr(xor))] // generate normal and code instead of kortestw
30642#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30643pub const fn _mm512_kortestz(a: __mmask16, b: __mmask16) -> i32 {
30644 let r: bool = (a | b) == 0;
30645 r as i32
30646}
30647
30648/// Compute the bitwise AND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero.
30649///
30650/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_test_epi32_mask&expand=5890)
30651#[inline]
30652#[target_feature(enable = "avx512f")]
30653#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30654#[cfg_attr(test, assert_instr(vptestmd))]
30655#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30656pub const fn _mm512_test_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
30657 let and: __m512i = _mm512_and_epi32(a, b);
30658 let zero: __m512i = _mm512_setzero_si512();
30659 _mm512_cmpneq_epi32_mask(a:and, b:zero)
30660}
30661
30662/// Compute the bitwise AND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero.
30663///
30664/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_test_epi32_mask&expand=5889)
30665#[inline]
30666#[target_feature(enable = "avx512f")]
30667#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30668#[cfg_attr(test, assert_instr(vptestmd))]
30669#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30670pub const fn _mm512_mask_test_epi32_mask(k: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
30671 let and: __m512i = _mm512_and_epi32(a, b);
30672 let zero: __m512i = _mm512_setzero_si512();
30673 _mm512_mask_cmpneq_epi32_mask(k1:k, a:and, b:zero)
30674}
30675
30676/// Compute the bitwise AND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero.
30677///
30678/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_test_epi32_mask&expand=5888)
30679#[inline]
30680#[target_feature(enable = "avx512f,avx512vl")]
30681#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30682#[cfg_attr(test, assert_instr(vptestmd))]
30683#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30684pub const fn _mm256_test_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 {
30685 let and: __m256i = _mm256_and_si256(a, b);
30686 let zero: __m256i = _mm256_setzero_si256();
30687 _mm256_cmpneq_epi32_mask(a:and, b:zero)
30688}
30689
30690/// Compute the bitwise AND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero.
30691///
30692/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_test_epi32_mask&expand=5887)
30693#[inline]
30694#[target_feature(enable = "avx512f,avx512vl")]
30695#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30696#[cfg_attr(test, assert_instr(vptestmd))]
30697#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30698pub const fn _mm256_mask_test_epi32_mask(k: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
30699 let and: __m256i = _mm256_and_si256(a, b);
30700 let zero: __m256i = _mm256_setzero_si256();
30701 _mm256_mask_cmpneq_epi32_mask(k1:k, a:and, b:zero)
30702}
30703
30704/// Compute the bitwise AND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero.
30705///
30706/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_test_epi32_mask&expand=5886)
30707#[inline]
30708#[target_feature(enable = "avx512f,avx512vl")]
30709#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30710#[cfg_attr(test, assert_instr(vptestmd))]
30711#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30712pub const fn _mm_test_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 {
30713 let and: __m128i = _mm_and_si128(a, b);
30714 let zero: __m128i = _mm_setzero_si128();
30715 _mm_cmpneq_epi32_mask(a:and, b:zero)
30716}
30717
30718/// Compute the bitwise AND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero.
30719///
30720/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_test_epi32_mask&expand=5885)
30721#[inline]
30722#[target_feature(enable = "avx512f,avx512vl")]
30723#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30724#[cfg_attr(test, assert_instr(vptestmd))]
30725#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30726pub const fn _mm_mask_test_epi32_mask(k: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
30727 let and: __m128i = _mm_and_si128(a, b);
30728 let zero: __m128i = _mm_setzero_si128();
30729 _mm_mask_cmpneq_epi32_mask(k1:k, a:and, b:zero)
30730}
30731
30732/// Compute the bitwise AND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero.
30733///
30734/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_test_epi64_mask&expand=5896)
30735#[inline]
30736#[target_feature(enable = "avx512f")]
30737#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30738#[cfg_attr(test, assert_instr(vptestmq))]
30739#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30740pub const fn _mm512_test_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
30741 let and: __m512i = _mm512_and_epi64(a, b);
30742 let zero: __m512i = _mm512_setzero_si512();
30743 _mm512_cmpneq_epi64_mask(a:and, b:zero)
30744}
30745
30746/// Compute the bitwise AND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero.
30747///
30748/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_test_epi64_mask&expand=5895)
30749#[inline]
30750#[target_feature(enable = "avx512f")]
30751#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30752#[cfg_attr(test, assert_instr(vptestmq))]
30753#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30754pub const fn _mm512_mask_test_epi64_mask(k: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
30755 let and: __m512i = _mm512_and_epi64(a, b);
30756 let zero: __m512i = _mm512_setzero_si512();
30757 _mm512_mask_cmpneq_epi64_mask(k1:k, a:and, b:zero)
30758}
30759
30760/// Compute the bitwise AND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero.
30761///
30762/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_test_epi64_mask&expand=5894)
30763#[inline]
30764#[target_feature(enable = "avx512f,avx512vl")]
30765#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30766#[cfg_attr(test, assert_instr(vptestmq))]
30767#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30768pub const fn _mm256_test_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 {
30769 let and: __m256i = _mm256_and_si256(a, b);
30770 let zero: __m256i = _mm256_setzero_si256();
30771 _mm256_cmpneq_epi64_mask(a:and, b:zero)
30772}
30773
30774/// Compute the bitwise AND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero.
30775///
30776/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_test_epi64_mask&expand=5893)
30777#[inline]
30778#[target_feature(enable = "avx512f,avx512vl")]
30779#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30780#[cfg_attr(test, assert_instr(vptestmq))]
30781#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30782pub const fn _mm256_mask_test_epi64_mask(k: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
30783 let and: __m256i = _mm256_and_si256(a, b);
30784 let zero: __m256i = _mm256_setzero_si256();
30785 _mm256_mask_cmpneq_epi64_mask(k1:k, a:and, b:zero)
30786}
30787
30788/// Compute the bitwise AND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero.
30789///
30790/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_test_epi64_mask&expand=5892)
30791#[inline]
30792#[target_feature(enable = "avx512f,avx512vl")]
30793#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30794#[cfg_attr(test, assert_instr(vptestmq))]
30795#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30796pub const fn _mm_test_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 {
30797 let and: __m128i = _mm_and_si128(a, b);
30798 let zero: __m128i = _mm_setzero_si128();
30799 _mm_cmpneq_epi64_mask(a:and, b:zero)
30800}
30801
30802/// Compute the bitwise AND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero.
30803///
30804/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_test_epi64_mask&expand=5891)
30805#[inline]
30806#[target_feature(enable = "avx512f,avx512vl")]
30807#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30808#[cfg_attr(test, assert_instr(vptestmq))]
30809#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30810pub const fn _mm_mask_test_epi64_mask(k: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
30811 let and: __m128i = _mm_and_si128(a, b);
30812 let zero: __m128i = _mm_setzero_si128();
30813 _mm_mask_cmpneq_epi64_mask(k1:k, a:and, b:zero)
30814}
30815
30816/// Compute the bitwise NAND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k if the intermediate value is zero.
30817///
30818/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_testn_epi32_mask&expand=5921)
30819#[inline]
30820#[target_feature(enable = "avx512f")]
30821#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30822#[cfg_attr(test, assert_instr(vptestnmd))]
30823#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30824pub const fn _mm512_testn_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
30825 let and: __m512i = _mm512_and_epi32(a, b);
30826 let zero: __m512i = _mm512_setzero_si512();
30827 _mm512_cmpeq_epi32_mask(a:and, b:zero)
30828}
30829
30830/// Compute the bitwise NAND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero.
30831///
30832/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_testn_epi32_mask&expand=5920)
30833#[inline]
30834#[target_feature(enable = "avx512f")]
30835#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30836#[cfg_attr(test, assert_instr(vptestnmd))]
30837#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30838pub const fn _mm512_mask_testn_epi32_mask(k: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
30839 let and: __m512i = _mm512_and_epi32(a, b);
30840 let zero: __m512i = _mm512_setzero_si512();
30841 _mm512_mask_cmpeq_epi32_mask(k1:k, a:and, b:zero)
30842}
30843
30844/// Compute the bitwise NAND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k if the intermediate value is zero.
30845///
30846/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_testn_epi32_mask&expand=5919)
30847#[inline]
30848#[target_feature(enable = "avx512f,avx512vl")]
30849#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30850#[cfg_attr(test, assert_instr(vptestnmd))]
30851#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30852pub const fn _mm256_testn_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 {
30853 let and: __m256i = _mm256_and_si256(a, b);
30854 let zero: __m256i = _mm256_setzero_si256();
30855 _mm256_cmpeq_epi32_mask(a:and, b:zero)
30856}
30857
30858/// Compute the bitwise NAND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero.
30859///
30860/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_testn_epi32_mask&expand=5918)
30861#[inline]
30862#[target_feature(enable = "avx512f,avx512vl")]
30863#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30864#[cfg_attr(test, assert_instr(vptestnmd))]
30865#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30866pub const fn _mm256_mask_testn_epi32_mask(k: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
30867 let and: __m256i = _mm256_and_si256(a, b);
30868 let zero: __m256i = _mm256_setzero_si256();
30869 _mm256_mask_cmpeq_epi32_mask(k1:k, a:and, b:zero)
30870}
30871
30872/// Compute the bitwise NAND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k if the intermediate value is zero.
30873///
30874/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_testn_epi32_mask&expand=5917)
30875#[inline]
30876#[target_feature(enable = "avx512f,avx512vl")]
30877#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30878#[cfg_attr(test, assert_instr(vptestnmd))]
30879#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30880pub const fn _mm_testn_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 {
30881 let and: __m128i = _mm_and_si128(a, b);
30882 let zero: __m128i = _mm_setzero_si128();
30883 _mm_cmpeq_epi32_mask(a:and, b:zero)
30884}
30885
30886/// Compute the bitwise NAND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero.
30887///
30888/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_testn_epi32_mask&expand=5916)
30889#[inline]
30890#[target_feature(enable = "avx512f,avx512vl")]
30891#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30892#[cfg_attr(test, assert_instr(vptestnmd))]
30893#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30894pub const fn _mm_mask_testn_epi32_mask(k: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
30895 let and: __m128i = _mm_and_si128(a, b);
30896 let zero: __m128i = _mm_setzero_si128();
30897 _mm_mask_cmpeq_epi32_mask(k1:k, a:and, b:zero)
30898}
30899
30900/// Compute the bitwise NAND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k if the intermediate value is zero.
30901///
30902/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_testn_epi64_mask&expand=5927)
30903#[inline]
30904#[target_feature(enable = "avx512f")]
30905#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30906#[cfg_attr(test, assert_instr(vptestnmq))]
30907#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30908pub const fn _mm512_testn_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
30909 let and: __m512i = _mm512_and_epi64(a, b);
30910 let zero: __m512i = _mm512_setzero_si512();
30911 _mm512_cmpeq_epi64_mask(a:and, b:zero)
30912}
30913
30914/// Compute the bitwise NAND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero.
30915///
30916/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_testn_epi64_mask&expand=5926)
30917#[inline]
30918#[target_feature(enable = "avx512f")]
30919#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30920#[cfg_attr(test, assert_instr(vptestnmq))]
30921#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30922pub const fn _mm512_mask_testn_epi64_mask(k: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
30923 let and: __m512i = _mm512_and_epi64(a, b);
30924 let zero: __m512i = _mm512_setzero_si512();
30925 _mm512_mask_cmpeq_epi64_mask(k1:k, a:and, b:zero)
30926}
30927
30928/// Compute the bitwise NAND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k if the intermediate value is zero.
30929///
30930/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_testn_epi64_mask&expand=5925)
30931#[inline]
30932#[target_feature(enable = "avx512f,avx512vl")]
30933#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30934#[cfg_attr(test, assert_instr(vptestnmq))]
30935#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30936pub const fn _mm256_testn_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 {
30937 let and: __m256i = _mm256_and_si256(a, b);
30938 let zero: __m256i = _mm256_setzero_si256();
30939 _mm256_cmpeq_epi64_mask(a:and, b:zero)
30940}
30941
30942/// Compute the bitwise NAND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero.
30943///
30944/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_testn_epi64_mask&expand=5924)
30945#[inline]
30946#[target_feature(enable = "avx512f,avx512vl")]
30947#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30948#[cfg_attr(test, assert_instr(vptestnmq))]
30949#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30950pub const fn _mm256_mask_testn_epi64_mask(k: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
30951 let and: __m256i = _mm256_and_si256(a, b);
30952 let zero: __m256i = _mm256_setzero_si256();
30953 _mm256_mask_cmpeq_epi64_mask(k1:k, a:and, b:zero)
30954}
30955
30956/// Compute the bitwise NAND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k if the intermediate value is zero.
30957///
30958/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_testn_epi64_mask&expand=5923)
30959#[inline]
30960#[target_feature(enable = "avx512f,avx512vl")]
30961#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30962#[cfg_attr(test, assert_instr(vptestnmq))]
30963#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30964pub const fn _mm_testn_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 {
30965 let and: __m128i = _mm_and_si128(a, b);
30966 let zero: __m128i = _mm_setzero_si128();
30967 _mm_cmpeq_epi64_mask(a:and, b:zero)
30968}
30969
30970/// Compute the bitwise NAND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero.
30971///
30972/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_testn_epi64_mask&expand=5922)
30973#[inline]
30974#[target_feature(enable = "avx512f,avx512vl")]
30975#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30976#[cfg_attr(test, assert_instr(vptestnmq))]
30977#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30978pub const fn _mm_mask_testn_epi64_mask(k: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
30979 let and: __m128i = _mm_and_si128(a, b);
30980 let zero: __m128i = _mm_setzero_si128();
30981 _mm_mask_cmpeq_epi64_mask(k1:k, a:and, b:zero)
30982}
30983
30984/// Store 512-bits (composed of 16 packed single-precision (32-bit) floating-point elements) from a into memory using a non-temporal memory hint. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
30985///
30986/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_stream_ps&expand=5671)
30987///
30988/// # Safety of non-temporal stores
30989///
30990/// After using this intrinsic, but before any other access to the memory that this intrinsic
30991/// mutates, a call to [`_mm_sfence`] must be performed by the thread that used the intrinsic. In
30992/// particular, functions that call this intrinsic should generally call `_mm_sfence` before they
30993/// return.
30994///
30995/// See [`_mm_sfence`] for details.
30996#[inline]
30997#[target_feature(enable = "avx512f")]
30998#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30999#[cfg_attr(test, assert_instr(vmovntps))]
31000#[allow(clippy::cast_ptr_alignment)]
31001pub unsafe fn _mm512_stream_ps(mem_addr: *mut f32, a: __m512) {
31002 // see #1541, we should use inline asm to be sure, because LangRef isn't clear enough
31003 crate::arch::asm!(
31004 vps!("vmovntps", ",{a}"),
31005 p = in(reg) mem_addr,
31006 a = in(zmm_reg) a,
31007 options(nostack, preserves_flags),
31008 );
31009}
31010
31011/// Store 512-bits (composed of 8 packed double-precision (64-bit) floating-point elements) from a into memory using a non-temporal memory hint. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
31012///
31013/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_stream_pd&expand=5667)
31014///
31015/// # Safety of non-temporal stores
31016///
31017/// After using this intrinsic, but before any other access to the memory that this intrinsic
31018/// mutates, a call to [`_mm_sfence`] must be performed by the thread that used the intrinsic. In
31019/// particular, functions that call this intrinsic should generally call `_mm_sfence` before they
31020/// return.
31021///
31022/// See [`_mm_sfence`] for details.
31023#[inline]
31024#[target_feature(enable = "avx512f")]
31025#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31026#[cfg_attr(test, assert_instr(vmovntpd))]
31027#[allow(clippy::cast_ptr_alignment)]
31028pub unsafe fn _mm512_stream_pd(mem_addr: *mut f64, a: __m512d) {
31029 // see #1541, we should use inline asm to be sure, because LangRef isn't clear enough
31030 crate::arch::asm!(
31031 vps!("vmovntpd", ",{a}"),
31032 p = in(reg) mem_addr,
31033 a = in(zmm_reg) a,
31034 options(nostack, preserves_flags),
31035 );
31036}
31037
31038/// Store 512-bits of integer data from a into memory using a non-temporal memory hint. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
31039///
31040/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_stream_si512&expand=5675)
31041///
31042/// # Safety of non-temporal stores
31043///
31044/// After using this intrinsic, but before any other access to the memory that this intrinsic
31045/// mutates, a call to [`_mm_sfence`] must be performed by the thread that used the intrinsic. In
31046/// particular, functions that call this intrinsic should generally call `_mm_sfence` before they
31047/// return.
31048///
31049/// See [`_mm_sfence`] for details.
31050#[inline]
31051#[target_feature(enable = "avx512f")]
31052#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31053#[cfg_attr(test, assert_instr(vmovntdq))]
31054#[allow(clippy::cast_ptr_alignment)]
31055pub unsafe fn _mm512_stream_si512(mem_addr: *mut __m512i, a: __m512i) {
31056 // see #1541, we should use inline asm to be sure, because LangRef isn't clear enough
31057 crate::arch::asm!(
31058 vps!("vmovntdq", ",{a}"),
31059 p = in(reg) mem_addr,
31060 a = in(zmm_reg) a,
31061 options(nostack, preserves_flags),
31062 );
31063}
31064
31065/// Load 512-bits of integer data from memory into dst using a non-temporal memory hint. mem_addr
31066/// must be aligned on a 64-byte boundary or a general-protection exception may be generated. To
31067/// minimize caching, the data is flagged as non-temporal (unlikely to be used again soon)
31068///
31069/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_stream_load_si512)
31070#[inline]
31071#[target_feature(enable = "avx512f")]
31072#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31073pub unsafe fn _mm512_stream_load_si512(mem_addr: *const __m512i) -> __m512i {
31074 let dst: __m512i;
31075 crate::arch::asm!(
31076 vpl!("vmovntdqa {a}"),
31077 a = out(zmm_reg) dst,
31078 p = in(reg) mem_addr,
31079 options(pure, readonly, nostack, preserves_flags),
31080 );
31081 dst
31082}
31083
31084/// Sets packed 32-bit integers in `dst` with the supplied values.
31085///
31086/// [Intel's documentation]( https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set_ps&expand=4931)
31087#[inline]
31088#[target_feature(enable = "avx512f")]
31089#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31090#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
31091pub const fn _mm512_set_ps(
31092 e0: f32,
31093 e1: f32,
31094 e2: f32,
31095 e3: f32,
31096 e4: f32,
31097 e5: f32,
31098 e6: f32,
31099 e7: f32,
31100 e8: f32,
31101 e9: f32,
31102 e10: f32,
31103 e11: f32,
31104 e12: f32,
31105 e13: f32,
31106 e14: f32,
31107 e15: f32,
31108) -> __m512 {
31109 _mm512_setr_ps(
31110 e0:e15, e1:e14, e2:e13, e3:e12, e4:e11, e5:e10, e6:e9, e7:e8, e8:e7, e9:e6, e10:e5, e11:e4, e12:e3, e13:e2, e14:e1, e15:e0,
31111 )
31112}
31113
31114/// Sets packed 32-bit integers in `dst` with the supplied values in
31115/// reverse order.
31116///
31117/// [Intel's documentation]( https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setr_ps&expand=5008)
31118#[inline]
31119#[target_feature(enable = "avx512f")]
31120#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31121#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
31122pub const fn _mm512_setr_ps(
31123 e0: f32,
31124 e1: f32,
31125 e2: f32,
31126 e3: f32,
31127 e4: f32,
31128 e5: f32,
31129 e6: f32,
31130 e7: f32,
31131 e8: f32,
31132 e9: f32,
31133 e10: f32,
31134 e11: f32,
31135 e12: f32,
31136 e13: f32,
31137 e14: f32,
31138 e15: f32,
31139) -> __m512 {
31140 unsafe {
31141 let r: Simd = f32x16::new(
31142 x0:e0, x1:e1, x2:e2, x3:e3, x4:e4, x5:e5, x6:e6, x7:e7, x8:e8, x9:e9, x10:e10, x11:e11, x12:e12, x13:e13, x14:e14, x15:e15,
31143 );
31144 transmute(src:r)
31145 }
31146}
31147
31148/// Broadcast 64-bit float `a` to all elements of `dst`.
31149///
31150/// [Intel's documentation]( https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set1_pd&expand=4975)
31151#[inline]
31152#[target_feature(enable = "avx512f")]
31153#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31154#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
31155pub const fn _mm512_set1_pd(a: f64) -> __m512d {
31156 unsafe { transmute(src:f64x8::splat(a)) }
31157}
31158
31159/// Broadcast 32-bit float `a` to all elements of `dst`.
31160///
31161/// [Intel's documentation]( https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set1_ps&expand=4981)
31162#[inline]
31163#[target_feature(enable = "avx512f")]
31164#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31165#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
31166pub const fn _mm512_set1_ps(a: f32) -> __m512 {
31167 unsafe { transmute(src:f32x16::splat(a)) }
31168}
31169
31170/// Sets packed 32-bit integers in `dst` with the supplied values.
31171///
31172/// [Intel's documentation]( https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set_epi32&expand=4908)
31173#[inline]
31174#[target_feature(enable = "avx512f")]
31175#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31176#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
31177pub const fn _mm512_set_epi32(
31178 e15: i32,
31179 e14: i32,
31180 e13: i32,
31181 e12: i32,
31182 e11: i32,
31183 e10: i32,
31184 e9: i32,
31185 e8: i32,
31186 e7: i32,
31187 e6: i32,
31188 e5: i32,
31189 e4: i32,
31190 e3: i32,
31191 e2: i32,
31192 e1: i32,
31193 e0: i32,
31194) -> __m512i {
31195 _mm512_setr_epi32(
31196 e15:e0, e14:e1, e13:e2, e12:e3, e11:e4, e10:e5, e9:e6, e8:e7, e7:e8, e6:e9, e5:e10, e4:e11, e3:e12, e2:e13, e1:e14, e0:e15,
31197 )
31198}
31199
31200/// Broadcast 8-bit integer a to all elements of dst.
31201///
31202/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set1_epi8&expand=4972)
31203#[inline]
31204#[target_feature(enable = "avx512f")]
31205#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31206#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
31207pub const fn _mm512_set1_epi8(a: i8) -> __m512i {
31208 unsafe { transmute(src:i8x64::splat(a)) }
31209}
31210
31211/// Broadcast the low packed 16-bit integer from a to all elements of dst.
31212///
31213/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set1_epi16&expand=4944)
31214#[inline]
31215#[target_feature(enable = "avx512f")]
31216#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31217#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
31218pub const fn _mm512_set1_epi16(a: i16) -> __m512i {
31219 unsafe { transmute(src:i16x32::splat(a)) }
31220}
31221
31222/// Broadcast 32-bit integer `a` to all elements of `dst`.
31223///
31224/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_set1_epi32)
31225#[inline]
31226#[target_feature(enable = "avx512f")]
31227#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31228#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
31229pub const fn _mm512_set1_epi32(a: i32) -> __m512i {
31230 unsafe { transmute(src:i32x16::splat(a)) }
31231}
31232
31233/// Broadcast 32-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
31234///
31235/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_set1_epi32&expand=4951)
31236#[inline]
31237#[target_feature(enable = "avx512f")]
31238#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31239#[cfg_attr(test, assert_instr(vpbroadcastd))]
31240#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
31241pub const fn _mm512_mask_set1_epi32(src: __m512i, k: __mmask16, a: i32) -> __m512i {
31242 unsafe {
31243 let r: Simd = _mm512_set1_epi32(a).as_i32x16();
31244 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_i32x16()))
31245 }
31246}
31247
31248/// Broadcast 32-bit integer a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
31249///
31250/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_set1_epi32&expand=4952)
31251#[inline]
31252#[target_feature(enable = "avx512f")]
31253#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31254#[cfg_attr(test, assert_instr(vpbroadcastd))]
31255#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
31256pub const fn _mm512_maskz_set1_epi32(k: __mmask16, a: i32) -> __m512i {
31257 unsafe {
31258 let r: Simd = _mm512_set1_epi32(a).as_i32x16();
31259 transmute(src:simd_select_bitmask(m:k, yes:r, no:i32x16::ZERO))
31260 }
31261}
31262
31263/// Broadcast 32-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
31264///
31265/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_set1_epi32&expand=4948)
31266#[inline]
31267#[target_feature(enable = "avx512f,avx512vl")]
31268#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31269#[cfg_attr(test, assert_instr(vpbroadcastd))]
31270#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
31271pub const fn _mm256_mask_set1_epi32(src: __m256i, k: __mmask8, a: i32) -> __m256i {
31272 unsafe {
31273 let r: Simd = _mm256_set1_epi32(a).as_i32x8();
31274 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_i32x8()))
31275 }
31276}
31277
31278/// Broadcast 32-bit integer a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
31279///
31280/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_set1_epi32&expand=4949)
31281#[inline]
31282#[target_feature(enable = "avx512f,avx512vl")]
31283#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31284#[cfg_attr(test, assert_instr(vpbroadcastd))]
31285#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
31286pub const fn _mm256_maskz_set1_epi32(k: __mmask8, a: i32) -> __m256i {
31287 unsafe {
31288 let r: Simd = _mm256_set1_epi32(a).as_i32x8();
31289 transmute(src:simd_select_bitmask(m:k, yes:r, no:i32x8::ZERO))
31290 }
31291}
31292
31293/// Broadcast 32-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
31294///
31295/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_set1_epi32&expand=4945)
31296#[inline]
31297#[target_feature(enable = "avx512f,avx512vl")]
31298#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31299#[cfg_attr(test, assert_instr(vpbroadcastd))]
31300#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
31301pub const fn _mm_mask_set1_epi32(src: __m128i, k: __mmask8, a: i32) -> __m128i {
31302 unsafe {
31303 let r: Simd = _mm_set1_epi32(a).as_i32x4();
31304 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_i32x4()))
31305 }
31306}
31307
31308/// Broadcast 32-bit integer a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
31309///
31310/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_set1_epi32&expand=4946)
31311#[inline]
31312#[target_feature(enable = "avx512f,avx512vl")]
31313#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31314#[cfg_attr(test, assert_instr(vpbroadcastd))]
31315#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
31316pub const fn _mm_maskz_set1_epi32(k: __mmask8, a: i32) -> __m128i {
31317 unsafe {
31318 let r: Simd = _mm_set1_epi32(a).as_i32x4();
31319 transmute(src:simd_select_bitmask(m:k, yes:r, no:i32x4::ZERO))
31320 }
31321}
31322
31323/// Broadcast 64-bit integer `a` to all elements of `dst`.
31324///
31325/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set1_epi64&expand=4961)
31326#[inline]
31327#[target_feature(enable = "avx512f")]
31328#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31329#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
31330pub const fn _mm512_set1_epi64(a: i64) -> __m512i {
31331 unsafe { transmute(src:i64x8::splat(a)) }
31332}
31333
31334/// Broadcast 64-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
31335///
31336/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_set1_epi64&expand=4959)
31337#[inline]
31338#[target_feature(enable = "avx512f")]
31339#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31340#[cfg_attr(test, assert_instr(vpbroadcastq))]
31341#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
31342pub const fn _mm512_mask_set1_epi64(src: __m512i, k: __mmask8, a: i64) -> __m512i {
31343 unsafe {
31344 let r: Simd = _mm512_set1_epi64(a).as_i64x8();
31345 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_i64x8()))
31346 }
31347}
31348
31349/// Broadcast 64-bit integer a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
31350///
31351/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_set1_epi64&expand=4960)
31352#[inline]
31353#[target_feature(enable = "avx512f")]
31354#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31355#[cfg_attr(test, assert_instr(vpbroadcastq))]
31356#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
31357pub const fn _mm512_maskz_set1_epi64(k: __mmask8, a: i64) -> __m512i {
31358 unsafe {
31359 let r: Simd = _mm512_set1_epi64(a).as_i64x8();
31360 transmute(src:simd_select_bitmask(m:k, yes:r, no:i64x8::ZERO))
31361 }
31362}
31363
31364/// Broadcast 64-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
31365///
31366/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_set1_epi64&expand=4957)
31367#[inline]
31368#[target_feature(enable = "avx512f,avx512vl")]
31369#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31370#[cfg_attr(test, assert_instr(vpbroadcastq))]
31371#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
31372pub const fn _mm256_mask_set1_epi64(src: __m256i, k: __mmask8, a: i64) -> __m256i {
31373 unsafe {
31374 let r: Simd = _mm256_set1_epi64x(a).as_i64x4();
31375 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_i64x4()))
31376 }
31377}
31378
31379/// Broadcast 64-bit integer a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
31380///
31381/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_set1_epi64&expand=4958)
31382#[inline]
31383#[target_feature(enable = "avx512f,avx512vl")]
31384#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31385#[cfg_attr(test, assert_instr(vpbroadcastq))]
31386#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
31387pub const fn _mm256_maskz_set1_epi64(k: __mmask8, a: i64) -> __m256i {
31388 unsafe {
31389 let r: Simd = _mm256_set1_epi64x(a).as_i64x4();
31390 transmute(src:simd_select_bitmask(m:k, yes:r, no:i64x4::ZERO))
31391 }
31392}
31393
31394/// Broadcast 64-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
31395///
31396/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_set1_epi64&expand=4954)
31397#[inline]
31398#[target_feature(enable = "avx512f,avx512vl")]
31399#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31400#[cfg_attr(test, assert_instr(vpbroadcastq))]
31401#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
31402pub const fn _mm_mask_set1_epi64(src: __m128i, k: __mmask8, a: i64) -> __m128i {
31403 unsafe {
31404 let r: Simd = _mm_set1_epi64x(a).as_i64x2();
31405 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_i64x2()))
31406 }
31407}
31408
31409/// Broadcast 64-bit integer a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
31410///
31411/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_set1_epi64&expand=4955)
31412#[inline]
31413#[target_feature(enable = "avx512f,avx512vl")]
31414#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31415#[cfg_attr(test, assert_instr(vpbroadcastq))]
31416#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
31417pub const fn _mm_maskz_set1_epi64(k: __mmask8, a: i64) -> __m128i {
31418 unsafe {
31419 let r: Simd = _mm_set1_epi64x(a).as_i64x2();
31420 transmute(src:simd_select_bitmask(m:k, yes:r, no:i64x2::ZERO))
31421 }
31422}
31423
31424/// Set packed 64-bit integers in dst with the repeated 4 element sequence.
31425///
31426/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set4_epi64&expand=4983)
31427#[inline]
31428#[target_feature(enable = "avx512f")]
31429#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31430#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
31431pub const fn _mm512_set4_epi64(d: i64, c: i64, b: i64, a: i64) -> __m512i {
31432 _mm512_set_epi64(e0:d, e1:c, e2:b, e3:a, e4:d, e5:c, e6:b, e7:a)
31433}
31434
31435/// Set packed 64-bit integers in dst with the repeated 4 element sequence in reverse order.
31436///
31437/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setr4_epi64&expand=5010)
31438#[inline]
31439#[target_feature(enable = "avx512f")]
31440#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31441#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
31442pub const fn _mm512_setr4_epi64(d: i64, c: i64, b: i64, a: i64) -> __m512i {
31443 _mm512_set_epi64(e0:a, e1:b, e2:c, e3:d, e4:a, e5:b, e6:c, e7:d)
31444}
31445
31446/// Compare packed single-precision (32-bit) floating-point elements in a and b for less-than, and store the results in mask vector k.
31447///
31448/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmplt_ps_mask&expand=1074)
31449#[inline]
31450#[target_feature(enable = "avx512f")]
31451#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31452#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
31453pub fn _mm512_cmplt_ps_mask(a: __m512, b: __m512) -> __mmask16 {
31454 _mm512_cmp_ps_mask::<_CMP_LT_OS>(a, b)
31455}
31456
31457/// Compare packed single-precision (32-bit) floating-point elements in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31458///
31459/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmplt_ps_mask&expand=1075)
31460#[inline]
31461#[target_feature(enable = "avx512f")]
31462#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31463#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
31464pub fn _mm512_mask_cmplt_ps_mask(k1: __mmask16, a: __m512, b: __m512) -> __mmask16 {
31465 _mm512_mask_cmp_ps_mask::<_CMP_LT_OS>(k1, a, b)
31466}
31467
31468/// Compare packed single-precision (32-bit) floating-point elements in a and b for not-less-than, and store the results in mask vector k.
31469///
31470/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpnlt_ps_mask&expand=1154)
31471#[inline]
31472#[target_feature(enable = "avx512f")]
31473#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31474#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
31475pub fn _mm512_cmpnlt_ps_mask(a: __m512, b: __m512) -> __mmask16 {
31476 _mm512_cmp_ps_mask::<_CMP_NLT_US>(a, b)
31477}
31478
31479/// Compare packed single-precision (32-bit) floating-point elements in a and b for not-less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31480///
31481/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpnlt_ps_mask&expand=1155)
31482#[inline]
31483#[target_feature(enable = "avx512f")]
31484#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31485#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
31486pub fn _mm512_mask_cmpnlt_ps_mask(k1: __mmask16, a: __m512, b: __m512) -> __mmask16 {
31487 _mm512_mask_cmp_ps_mask::<_CMP_NLT_US>(k1, a, b)
31488}
31489
31490/// Compare packed single-precision (32-bit) floating-point elements in a and b for less-than-or-equal, and store the results in mask vector k.
31491///
31492/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmple_ps_mask&expand=1013)
31493#[inline]
31494#[target_feature(enable = "avx512f")]
31495#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31496#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
31497pub fn _mm512_cmple_ps_mask(a: __m512, b: __m512) -> __mmask16 {
31498 _mm512_cmp_ps_mask::<_CMP_LE_OS>(a, b)
31499}
31500
31501/// Compare packed single-precision (32-bit) floating-point elements in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31502///
31503/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmple_ps_mask&expand=1014)
31504#[inline]
31505#[target_feature(enable = "avx512f")]
31506#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31507#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
31508pub fn _mm512_mask_cmple_ps_mask(k1: __mmask16, a: __m512, b: __m512) -> __mmask16 {
31509 _mm512_mask_cmp_ps_mask::<_CMP_LE_OS>(k1, a, b)
31510}
31511
31512/// Compare packed single-precision (32-bit) floating-point elements in a and b for not-less-than-or-equal, and store the results in mask vector k.
31513///
31514/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpnle_ps_mask&expand=1146)
31515#[inline]
31516#[target_feature(enable = "avx512f")]
31517#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31518#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
31519pub fn _mm512_cmpnle_ps_mask(a: __m512, b: __m512) -> __mmask16 {
31520 _mm512_cmp_ps_mask::<_CMP_NLE_US>(a, b)
31521}
31522
31523/// Compare packed single-precision (32-bit) floating-point elements in a and b for not-less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31524///
31525/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpnle_ps_mask&expand=1147)
31526#[inline]
31527#[target_feature(enable = "avx512f")]
31528#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31529#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
31530pub fn _mm512_mask_cmpnle_ps_mask(k1: __mmask16, a: __m512, b: __m512) -> __mmask16 {
31531 _mm512_mask_cmp_ps_mask::<_CMP_NLE_US>(k1, a, b)
31532}
31533
31534/// Compare packed single-precision (32-bit) floating-point elements in a and b for equality, and store the results in mask vector k.
31535///
31536/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpeq_ps_mask&expand=828)
31537#[inline]
31538#[target_feature(enable = "avx512f")]
31539#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31540#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
31541pub fn _mm512_cmpeq_ps_mask(a: __m512, b: __m512) -> __mmask16 {
31542 _mm512_cmp_ps_mask::<_CMP_EQ_OQ>(a, b)
31543}
31544
31545/// Compare packed single-precision (32-bit) floating-point elements in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31546///
31547/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpeq_ps_mask&expand=829)
31548#[inline]
31549#[target_feature(enable = "avx512f")]
31550#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31551#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
31552pub fn _mm512_mask_cmpeq_ps_mask(k1: __mmask16, a: __m512, b: __m512) -> __mmask16 {
31553 _mm512_mask_cmp_ps_mask::<_CMP_EQ_OQ>(k1, a, b)
31554}
31555
31556/// Compare packed single-precision (32-bit) floating-point elements in a and b for not-equal, and store the results in mask vector k.
31557///
31558/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpneq_ps_mask&expand=1130)
31559#[inline]
31560#[target_feature(enable = "avx512f")]
31561#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31562#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
31563pub fn _mm512_cmpneq_ps_mask(a: __m512, b: __m512) -> __mmask16 {
31564 _mm512_cmp_ps_mask::<_CMP_NEQ_UQ>(a, b)
31565}
31566
31567/// Compare packed single-precision (32-bit) floating-point elements in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31568///
31569/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpneq_ps_mask&expand=1131)
31570#[inline]
31571#[target_feature(enable = "avx512f")]
31572#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31573#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
31574pub fn _mm512_mask_cmpneq_ps_mask(k1: __mmask16, a: __m512, b: __m512) -> __mmask16 {
31575 _mm512_mask_cmp_ps_mask::<_CMP_NEQ_UQ>(k1, a, b)
31576}
31577
31578/// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
31579///
31580/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmp_ps_mask&expand=749)
31581#[inline]
31582#[target_feature(enable = "avx512f")]
31583#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31584#[rustc_legacy_const_generics(2)]
31585#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
31586pub fn _mm512_cmp_ps_mask<const IMM8: i32>(a: __m512, b: __m512) -> __mmask16 {
31587 unsafe {
31588 static_assert_uimm_bits!(IMM8, 5);
31589 let neg_one: i16 = -1;
31590 let a: Simd = a.as_f32x16();
31591 let b: Simd = b.as_f32x16();
31592 let r: i16 = vcmpps(a, b, IMM8, m:neg_one, _MM_FROUND_CUR_DIRECTION);
31593 r.cast_unsigned()
31594 }
31595}
31596
31597/// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31598///
31599/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmp_ps_mask&expand=750)
31600#[inline]
31601#[target_feature(enable = "avx512f")]
31602#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31603#[rustc_legacy_const_generics(3)]
31604#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
31605pub fn _mm512_mask_cmp_ps_mask<const IMM8: i32>(k1: __mmask16, a: __m512, b: __m512) -> __mmask16 {
31606 unsafe {
31607 static_assert_uimm_bits!(IMM8, 5);
31608 let a: Simd = a.as_f32x16();
31609 let b: Simd = b.as_f32x16();
31610 let r: i16 = vcmpps(a, b, IMM8, m:k1 as i16, _MM_FROUND_CUR_DIRECTION);
31611 r.cast_unsigned()
31612 }
31613}
31614
31615/// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
31616///
31617/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmp_ps_mask&expand=747)
31618#[inline]
31619#[target_feature(enable = "avx512f,avx512vl")]
31620#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31621#[rustc_legacy_const_generics(2)]
31622#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
31623pub fn _mm256_cmp_ps_mask<const IMM8: i32>(a: __m256, b: __m256) -> __mmask8 {
31624 unsafe {
31625 static_assert_uimm_bits!(IMM8, 5);
31626 let neg_one: i8 = -1;
31627 let a: Simd = a.as_f32x8();
31628 let b: Simd = b.as_f32x8();
31629 let r: i8 = vcmpps256(a, b, IMM8, m:neg_one);
31630 r.cast_unsigned()
31631 }
31632}
31633
31634/// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31635///
31636/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmp_ps_mask&expand=748)
31637#[inline]
31638#[target_feature(enable = "avx512f,avx512vl")]
31639#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31640#[rustc_legacy_const_generics(3)]
31641#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
31642pub fn _mm256_mask_cmp_ps_mask<const IMM8: i32>(k1: __mmask8, a: __m256, b: __m256) -> __mmask8 {
31643 unsafe {
31644 static_assert_uimm_bits!(IMM8, 5);
31645 let a: Simd = a.as_f32x8();
31646 let b: Simd = b.as_f32x8();
31647 let r: i8 = vcmpps256(a, b, IMM8, m:k1 as i8);
31648 r.cast_unsigned()
31649 }
31650}
31651
31652/// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
31653///
31654/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_ps_mask&expand=745)
31655#[inline]
31656#[target_feature(enable = "avx512f,avx512vl")]
31657#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31658#[rustc_legacy_const_generics(2)]
31659#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
31660pub fn _mm_cmp_ps_mask<const IMM8: i32>(a: __m128, b: __m128) -> __mmask8 {
31661 unsafe {
31662 static_assert_uimm_bits!(IMM8, 5);
31663 let neg_one: i8 = -1;
31664 let a: Simd = a.as_f32x4();
31665 let b: Simd = b.as_f32x4();
31666 let r: i8 = vcmpps128(a, b, IMM8, m:neg_one);
31667 r.cast_unsigned()
31668 }
31669}
31670
31671/// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31672///
31673/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_ps_mask&expand=746)
31674#[inline]
31675#[target_feature(enable = "avx512f,avx512vl")]
31676#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31677#[rustc_legacy_const_generics(3)]
31678#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
31679pub fn _mm_mask_cmp_ps_mask<const IMM8: i32>(k1: __mmask8, a: __m128, b: __m128) -> __mmask8 {
31680 unsafe {
31681 static_assert_uimm_bits!(IMM8, 5);
31682 let a: Simd = a.as_f32x4();
31683 let b: Simd = b.as_f32x4();
31684 let r: i8 = vcmpps128(a, b, IMM8, m:k1 as i8);
31685 r.cast_unsigned()
31686 }
31687}
31688
31689/// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.\
31690/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
31691///
31692/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmp_round_ps_mask&expand=753)
31693#[inline]
31694#[target_feature(enable = "avx512f")]
31695#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31696#[cfg_attr(test, assert_instr(vcmp, IMM5 = 0, SAE = 4))]
31697#[rustc_legacy_const_generics(2, 3)]
31698pub fn _mm512_cmp_round_ps_mask<const IMM5: i32, const SAE: i32>(
31699 a: __m512,
31700 b: __m512,
31701) -> __mmask16 {
31702 unsafe {
31703 static_assert_uimm_bits!(IMM5, 5);
31704 static_assert_mantissas_sae!(SAE);
31705 let neg_one: i16 = -1;
31706 let a: Simd = a.as_f32x16();
31707 let b: Simd = b.as_f32x16();
31708 let r: i16 = vcmpps(a, b, IMM5, m:neg_one, SAE);
31709 r.cast_unsigned()
31710 }
31711}
31712
31713/// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).\
31714/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
31715///
31716/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmp_round_ps_mask&expand=754)
31717#[inline]
31718#[target_feature(enable = "avx512f")]
31719#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31720#[cfg_attr(test, assert_instr(vcmp, IMM5 = 0, SAE = 4))]
31721#[rustc_legacy_const_generics(3, 4)]
31722pub fn _mm512_mask_cmp_round_ps_mask<const IMM5: i32, const SAE: i32>(
31723 m: __mmask16,
31724 a: __m512,
31725 b: __m512,
31726) -> __mmask16 {
31727 unsafe {
31728 static_assert_uimm_bits!(IMM5, 5);
31729 static_assert_mantissas_sae!(SAE);
31730 let a: Simd = a.as_f32x16();
31731 let b: Simd = b.as_f32x16();
31732 let r: i16 = vcmpps(a, b, IMM5, m as i16, SAE);
31733 r.cast_unsigned()
31734 }
31735}
31736
31737/// Compare packed single-precision (32-bit) floating-point elements in a and b to see if neither is NaN, and store the results in mask vector k.
31738///
31739/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpord_ps_mask&expand=1162)
31740#[inline]
31741#[target_feature(enable = "avx512f")]
31742#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31743#[cfg_attr(test, assert_instr(vcmp))] //should be vcmps
31744pub fn _mm512_cmpord_ps_mask(a: __m512, b: __m512) -> __mmask16 {
31745 _mm512_cmp_ps_mask::<_CMP_ORD_Q>(a, b)
31746}
31747
31748/// Compare packed single-precision (32-bit) floating-point elements in a and b to see if neither is NaN, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31749///
31750/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpord_ps_mask&expand=1163)
31751#[inline]
31752#[target_feature(enable = "avx512f")]
31753#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31754#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
31755pub fn _mm512_mask_cmpord_ps_mask(k1: __mmask16, a: __m512, b: __m512) -> __mmask16 {
31756 _mm512_mask_cmp_ps_mask::<_CMP_ORD_Q>(k1, a, b)
31757}
31758
31759/// Compare packed single-precision (32-bit) floating-point elements in a and b to see if either is NaN, and store the results in mask vector k.
31760///
31761/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpunord_ps_mask&expand=1170)
31762#[inline]
31763#[target_feature(enable = "avx512f")]
31764#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31765#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
31766pub fn _mm512_cmpunord_ps_mask(a: __m512, b: __m512) -> __mmask16 {
31767 _mm512_cmp_ps_mask::<_CMP_UNORD_Q>(a, b)
31768}
31769
31770/// Compare packed single-precision (32-bit) floating-point elements in a and b to see if either is NaN, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31771///
31772/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpunord_ps_mask&expand=1171)
31773#[inline]
31774#[target_feature(enable = "avx512f")]
31775#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31776#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
31777pub fn _mm512_mask_cmpunord_ps_mask(k1: __mmask16, a: __m512, b: __m512) -> __mmask16 {
31778 _mm512_mask_cmp_ps_mask::<_CMP_UNORD_Q>(k1, a, b)
31779}
31780
31781/// Compare packed double-precision (64-bit) floating-point elements in a and b for less-than, and store the results in mask vector k.
31782///
31783/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmplt_pd_mask&expand=1071)
31784#[inline]
31785#[target_feature(enable = "avx512f")]
31786#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31787#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
31788pub fn _mm512_cmplt_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
31789 _mm512_cmp_pd_mask::<_CMP_LT_OS>(a, b)
31790}
31791
31792/// Compare packed double-precision (64-bit) floating-point elements in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31793///
31794/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmplt_pd_mask&expand=1072)
31795#[inline]
31796#[target_feature(enable = "avx512f")]
31797#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31798#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
31799pub fn _mm512_mask_cmplt_pd_mask(k1: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
31800 _mm512_mask_cmp_pd_mask::<_CMP_LT_OS>(k1, a, b)
31801}
31802
31803/// Compare packed double-precision (64-bit) floating-point elements in a and b for not-less-than, and store the results in mask vector k.
31804///
31805/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpnlt_pd_mask&expand=1151)
31806#[inline]
31807#[target_feature(enable = "avx512f")]
31808#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31809#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
31810pub fn _mm512_cmpnlt_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
31811 _mm512_cmp_pd_mask::<_CMP_NLT_US>(a, b)
31812}
31813
31814/// Compare packed double-precision (64-bit) floating-point elements in a and b for not-less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31815///
31816/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpnlt_pd_mask&expand=1152)
31817#[inline]
31818#[target_feature(enable = "avx512f")]
31819#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31820#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
31821pub fn _mm512_mask_cmpnlt_pd_mask(m: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
31822 _mm512_mask_cmp_pd_mask::<_CMP_NLT_US>(k1:m, a, b)
31823}
31824
31825/// Compare packed double-precision (64-bit) floating-point elements in a and b for less-than-or-equal, and store the results in mask vector k.
31826///
31827/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmple_pd_mask&expand=1010)
31828#[inline]
31829#[target_feature(enable = "avx512f")]
31830#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31831#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
31832pub fn _mm512_cmple_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
31833 _mm512_cmp_pd_mask::<_CMP_LE_OS>(a, b)
31834}
31835
31836/// Compare packed double-precision (64-bit) floating-point elements in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31837///
31838/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmple_pd_mask&expand=1011)
31839#[inline]
31840#[target_feature(enable = "avx512f")]
31841#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31842#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
31843pub fn _mm512_mask_cmple_pd_mask(k1: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
31844 _mm512_mask_cmp_pd_mask::<_CMP_LE_OS>(k1, a, b)
31845}
31846
31847/// Compare packed double-precision (64-bit) floating-point elements in a and b for not-less-than-or-equal, and store the results in mask vector k.
31848///
31849/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpnle_pd_mask&expand=1143)
31850#[inline]
31851#[target_feature(enable = "avx512f")]
31852#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31853#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
31854pub fn _mm512_cmpnle_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
31855 _mm512_cmp_pd_mask::<_CMP_NLE_US>(a, b)
31856}
31857
31858/// Compare packed double-precision (64-bit) floating-point elements in a and b for not-less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31859///
31860/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpnle_pd_mask&expand=1144)
31861#[inline]
31862#[target_feature(enable = "avx512f")]
31863#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31864#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
31865pub fn _mm512_mask_cmpnle_pd_mask(k1: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
31866 _mm512_mask_cmp_pd_mask::<_CMP_NLE_US>(k1, a, b)
31867}
31868
31869/// Compare packed double-precision (64-bit) floating-point elements in a and b for equality, and store the results in mask vector k.
31870///
31871/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpeq_pd_mask&expand=822)
31872#[inline]
31873#[target_feature(enable = "avx512f")]
31874#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31875#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
31876pub fn _mm512_cmpeq_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
31877 _mm512_cmp_pd_mask::<_CMP_EQ_OQ>(a, b)
31878}
31879
31880/// Compare packed double-precision (64-bit) floating-point elements in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31881///
31882/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpeq_pd_mask&expand=823)
31883#[inline]
31884#[target_feature(enable = "avx512f")]
31885#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31886#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
31887pub fn _mm512_mask_cmpeq_pd_mask(k1: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
31888 _mm512_mask_cmp_pd_mask::<_CMP_EQ_OQ>(k1, a, b)
31889}
31890
31891/// Compare packed double-precision (64-bit) floating-point elements in a and b for not-equal, and store the results in mask vector k.
31892///
31893/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpneq_pd_mask&expand=1127)
31894#[inline]
31895#[target_feature(enable = "avx512f")]
31896#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31897#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
31898pub fn _mm512_cmpneq_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
31899 _mm512_cmp_pd_mask::<_CMP_NEQ_UQ>(a, b)
31900}
31901
31902/// Compare packed double-precision (64-bit) floating-point elements in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31903///
31904/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpneq_pd_mask&expand=1128)
31905#[inline]
31906#[target_feature(enable = "avx512f")]
31907#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31908#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
31909pub fn _mm512_mask_cmpneq_pd_mask(k1: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
31910 _mm512_mask_cmp_pd_mask::<_CMP_NEQ_UQ>(k1, a, b)
31911}
31912
31913/// Compare packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
31914///
31915/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmp_pd_mask&expand=741)
31916#[inline]
31917#[target_feature(enable = "avx512f")]
31918#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31919#[rustc_legacy_const_generics(2)]
31920#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
31921pub fn _mm512_cmp_pd_mask<const IMM8: i32>(a: __m512d, b: __m512d) -> __mmask8 {
31922 unsafe {
31923 static_assert_uimm_bits!(IMM8, 5);
31924 let neg_one: i8 = -1;
31925 let a: Simd = a.as_f64x8();
31926 let b: Simd = b.as_f64x8();
31927 let r: i8 = vcmppd(a, b, IMM8, m:neg_one, _MM_FROUND_CUR_DIRECTION);
31928 r.cast_unsigned()
31929 }
31930}
31931
31932/// Compare packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31933///
31934/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmp_pd_mask&expand=742)
31935#[inline]
31936#[target_feature(enable = "avx512f")]
31937#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31938#[rustc_legacy_const_generics(3)]
31939#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
31940pub fn _mm512_mask_cmp_pd_mask<const IMM8: i32>(k1: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
31941 unsafe {
31942 static_assert_uimm_bits!(IMM8, 5);
31943 let a: Simd = a.as_f64x8();
31944 let b: Simd = b.as_f64x8();
31945 let r: i8 = vcmppd(a, b, IMM8, m:k1 as i8, _MM_FROUND_CUR_DIRECTION);
31946 r.cast_unsigned()
31947 }
31948}
31949
31950/// Compare packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
31951///
31952/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmp_pd_mask&expand=739)
31953#[inline]
31954#[target_feature(enable = "avx512f,avx512vl")]
31955#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31956#[rustc_legacy_const_generics(2)]
31957#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
31958pub fn _mm256_cmp_pd_mask<const IMM8: i32>(a: __m256d, b: __m256d) -> __mmask8 {
31959 unsafe {
31960 static_assert_uimm_bits!(IMM8, 5);
31961 let neg_one: i8 = -1;
31962 let a: Simd = a.as_f64x4();
31963 let b: Simd = b.as_f64x4();
31964 let r: i8 = vcmppd256(a, b, IMM8, m:neg_one);
31965 r.cast_unsigned()
31966 }
31967}
31968
31969/// Compare packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31970///
31971/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmp_pd_mask&expand=740)
31972#[inline]
31973#[target_feature(enable = "avx512f,avx512vl")]
31974#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31975#[rustc_legacy_const_generics(3)]
31976#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
31977pub fn _mm256_mask_cmp_pd_mask<const IMM8: i32>(k1: __mmask8, a: __m256d, b: __m256d) -> __mmask8 {
31978 unsafe {
31979 static_assert_uimm_bits!(IMM8, 5);
31980 let a: Simd = a.as_f64x4();
31981 let b: Simd = b.as_f64x4();
31982 let r: i8 = vcmppd256(a, b, IMM8, m:k1 as i8);
31983 r.cast_unsigned()
31984 }
31985}
31986
31987/// Compare packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
31988///
31989/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_pd_mask&expand=737)
31990#[inline]
31991#[target_feature(enable = "avx512f,avx512vl")]
31992#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31993#[rustc_legacy_const_generics(2)]
31994#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
31995pub fn _mm_cmp_pd_mask<const IMM8: i32>(a: __m128d, b: __m128d) -> __mmask8 {
31996 unsafe {
31997 static_assert_uimm_bits!(IMM8, 5);
31998 let neg_one: i8 = -1;
31999 let a: Simd = a.as_f64x2();
32000 let b: Simd = b.as_f64x2();
32001 let r: i8 = vcmppd128(a, b, IMM8, m:neg_one);
32002 r.cast_unsigned()
32003 }
32004}
32005
32006/// Compare packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32007///
32008/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_pd_mask&expand=738)
32009#[inline]
32010#[target_feature(enable = "avx512f,avx512vl")]
32011#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32012#[rustc_legacy_const_generics(3)]
32013#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
32014pub fn _mm_mask_cmp_pd_mask<const IMM8: i32>(k1: __mmask8, a: __m128d, b: __m128d) -> __mmask8 {
32015 unsafe {
32016 static_assert_uimm_bits!(IMM8, 5);
32017 let a: Simd = a.as_f64x2();
32018 let b: Simd = b.as_f64x2();
32019 let r: i8 = vcmppd128(a, b, IMM8, m:k1 as i8);
32020 r.cast_unsigned()
32021 }
32022}
32023
32024/// Compare packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.\
32025/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
32026///
32027/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmp_round_pd_mask&expand=751)
32028#[inline]
32029#[target_feature(enable = "avx512f")]
32030#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32031#[cfg_attr(test, assert_instr(vcmp, IMM5 = 0, SAE = 4))]
32032#[rustc_legacy_const_generics(2, 3)]
32033pub fn _mm512_cmp_round_pd_mask<const IMM5: i32, const SAE: i32>(
32034 a: __m512d,
32035 b: __m512d,
32036) -> __mmask8 {
32037 unsafe {
32038 static_assert_uimm_bits!(IMM5, 5);
32039 static_assert_mantissas_sae!(SAE);
32040 let neg_one: i8 = -1;
32041 let a: Simd = a.as_f64x8();
32042 let b: Simd = b.as_f64x8();
32043 let r: i8 = vcmppd(a, b, IMM5, m:neg_one, SAE);
32044 r.cast_unsigned()
32045 }
32046}
32047
32048/// Compare packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).\
32049/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
32050///
32051/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmp_round_pd_mask&expand=752)
32052#[inline]
32053#[target_feature(enable = "avx512f")]
32054#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32055#[cfg_attr(test, assert_instr(vcmp, IMM5 = 0, SAE = 4))]
32056#[rustc_legacy_const_generics(3, 4)]
32057pub fn _mm512_mask_cmp_round_pd_mask<const IMM5: i32, const SAE: i32>(
32058 k1: __mmask8,
32059 a: __m512d,
32060 b: __m512d,
32061) -> __mmask8 {
32062 unsafe {
32063 static_assert_uimm_bits!(IMM5, 5);
32064 static_assert_mantissas_sae!(SAE);
32065 let a: Simd = a.as_f64x8();
32066 let b: Simd = b.as_f64x8();
32067 let r: i8 = vcmppd(a, b, IMM5, m:k1 as i8, SAE);
32068 r.cast_unsigned()
32069 }
32070}
32071
32072/// Compare packed double-precision (64-bit) floating-point elements in a and b to see if neither is NaN, and store the results in mask vector k.
32073///
32074/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpord_pd_mask&expand=1159)
32075#[inline]
32076#[target_feature(enable = "avx512f")]
32077#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32078#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
32079pub fn _mm512_cmpord_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
32080 _mm512_cmp_pd_mask::<_CMP_ORD_Q>(a, b)
32081}
32082
32083/// Compare packed double-precision (64-bit) floating-point elements in a and b to see if neither is NaN, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32084///
32085/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpord_pd_mask&expand=1160)
32086#[inline]
32087#[target_feature(enable = "avx512f")]
32088#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32089#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
32090pub fn _mm512_mask_cmpord_pd_mask(k1: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
32091 _mm512_mask_cmp_pd_mask::<_CMP_ORD_Q>(k1, a, b)
32092}
32093
32094/// Compare packed double-precision (64-bit) floating-point elements in a and b to see if either is NaN, and store the results in mask vector k.
32095///
32096/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpunord_pd_mask&expand=1167)
32097#[inline]
32098#[target_feature(enable = "avx512f")]
32099#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32100#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
32101pub fn _mm512_cmpunord_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
32102 _mm512_cmp_pd_mask::<_CMP_UNORD_Q>(a, b)
32103}
32104
32105/// Compare packed double-precision (64-bit) floating-point elements in a and b to see if either is NaN, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32106///
32107/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpunord_pd_mask&expand=1168)
32108#[inline]
32109#[target_feature(enable = "avx512f")]
32110#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32111#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
32112pub fn _mm512_mask_cmpunord_pd_mask(k1: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
32113 _mm512_mask_cmp_pd_mask::<_CMP_UNORD_Q>(k1, a, b)
32114}
32115
32116/// Compare the lower single-precision (32-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in mask vector k.
32117///
32118/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_ss_mask&expand=763)
32119#[inline]
32120#[target_feature(enable = "avx512f")]
32121#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32122#[rustc_legacy_const_generics(2)]
32123#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
32124pub fn _mm_cmp_ss_mask<const IMM8: i32>(a: __m128, b: __m128) -> __mmask8 {
32125 unsafe {
32126 static_assert_uimm_bits!(IMM8, 5);
32127 let neg_one: i8 = -1;
32128 let r: i8 = vcmpss(a, b, IMM8, m:neg_one, _MM_FROUND_CUR_DIRECTION);
32129 r.cast_unsigned()
32130 }
32131}
32132
32133/// Compare the lower single-precision (32-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in mask vector k using zeromask k1 (the element is zeroed out when mask bit 0 is not set).
32134///
32135/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_ss_mask&expand=764)
32136#[inline]
32137#[target_feature(enable = "avx512f")]
32138#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32139#[rustc_legacy_const_generics(3)]
32140#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
32141pub fn _mm_mask_cmp_ss_mask<const IMM8: i32>(k1: __mmask8, a: __m128, b: __m128) -> __mmask8 {
32142 unsafe {
32143 static_assert_uimm_bits!(IMM8, 5);
32144 let r: i8 = vcmpss(a, b, IMM8, m:k1 as i8, _MM_FROUND_CUR_DIRECTION);
32145 r.cast_unsigned()
32146 }
32147}
32148
32149/// Compare the lower single-precision (32-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in mask vector k.\
32150/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
32151///
32152/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_round_ss_mask&expand=757)
32153#[inline]
32154#[target_feature(enable = "avx512f")]
32155#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32156#[cfg_attr(test, assert_instr(vcmp, IMM5 = 0, SAE = 4))]
32157#[rustc_legacy_const_generics(2, 3)]
32158pub fn _mm_cmp_round_ss_mask<const IMM5: i32, const SAE: i32>(a: __m128, b: __m128) -> __mmask8 {
32159 unsafe {
32160 static_assert_uimm_bits!(IMM5, 5);
32161 static_assert_mantissas_sae!(SAE);
32162 let neg_one: i8 = -1;
32163 let r: i8 = vcmpss(a, b, IMM5, m:neg_one, SAE);
32164 r.cast_unsigned()
32165 }
32166}
32167
32168/// Compare the lower single-precision (32-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in mask vector k using zeromask k1 (the element is zeroed out when mask bit 0 is not seti).\
32169/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
32170///
32171/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_round_ss_mask&expand=758)
32172#[inline]
32173#[target_feature(enable = "avx512f")]
32174#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32175#[cfg_attr(test, assert_instr(vcmp, IMM5 = 0, SAE = 4))]
32176#[rustc_legacy_const_generics(3, 4)]
32177pub fn _mm_mask_cmp_round_ss_mask<const IMM5: i32, const SAE: i32>(
32178 k1: __mmask8,
32179 a: __m128,
32180 b: __m128,
32181) -> __mmask8 {
32182 unsafe {
32183 static_assert_uimm_bits!(IMM5, 5);
32184 static_assert_mantissas_sae!(SAE);
32185 let r: i8 = vcmpss(a, b, IMM5, m:k1 as i8, SAE);
32186 r.cast_unsigned()
32187 }
32188}
32189
32190/// Compare the lower double-precision (64-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in mask vector k.
32191///
32192/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_sd_mask&expand=760)
32193#[inline]
32194#[target_feature(enable = "avx512f")]
32195#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32196#[rustc_legacy_const_generics(2)]
32197#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
32198pub fn _mm_cmp_sd_mask<const IMM8: i32>(a: __m128d, b: __m128d) -> __mmask8 {
32199 unsafe {
32200 static_assert_uimm_bits!(IMM8, 5);
32201 let neg_one: i8 = -1;
32202 let r: i8 = vcmpsd(a, b, IMM8, m:neg_one, _MM_FROUND_CUR_DIRECTION);
32203 r.cast_unsigned()
32204 }
32205}
32206
32207/// Compare the lower double-precision (64-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in mask vector k using zeromask k1 (the element is zeroed out when mask bit 0 is not set).
32208///
32209/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_sd_mask&expand=761)
32210#[inline]
32211#[target_feature(enable = "avx512f")]
32212#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32213#[rustc_legacy_const_generics(3)]
32214#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
32215pub fn _mm_mask_cmp_sd_mask<const IMM8: i32>(k1: __mmask8, a: __m128d, b: __m128d) -> __mmask8 {
32216 unsafe {
32217 static_assert_uimm_bits!(IMM8, 5);
32218 let r: i8 = vcmpsd(a, b, IMM8, m:k1 as i8, _MM_FROUND_CUR_DIRECTION);
32219 r.cast_unsigned()
32220 }
32221}
32222
32223/// Compare the lower double-precision (64-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in mask vector k.\
32224/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
32225///
32226/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_round_sd_mask&expand=755)
32227#[inline]
32228#[target_feature(enable = "avx512f")]
32229#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32230#[cfg_attr(test, assert_instr(vcmp, IMM5 = 0, SAE = 4))]
32231#[rustc_legacy_const_generics(2, 3)]
32232pub fn _mm_cmp_round_sd_mask<const IMM5: i32, const SAE: i32>(a: __m128d, b: __m128d) -> __mmask8 {
32233 unsafe {
32234 static_assert_uimm_bits!(IMM5, 5);
32235 static_assert_mantissas_sae!(SAE);
32236 let neg_one: i8 = -1;
32237 let r: i8 = vcmpsd(a, b, IMM5, m:neg_one, SAE);
32238 r.cast_unsigned()
32239 }
32240}
32241
32242/// Compare the lower double-precision (64-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in mask vector k using zeromask k1 (the element is zeroed out when mask bit 0 is not set).\
32243/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
32244///
32245/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_round_sd_mask&expand=756)
32246#[inline]
32247#[target_feature(enable = "avx512f")]
32248#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32249#[cfg_attr(test, assert_instr(vcmp, IMM5 = 0, SAE = 4))]
32250#[rustc_legacy_const_generics(3, 4)]
32251pub fn _mm_mask_cmp_round_sd_mask<const IMM5: i32, const SAE: i32>(
32252 k1: __mmask8,
32253 a: __m128d,
32254 b: __m128d,
32255) -> __mmask8 {
32256 unsafe {
32257 static_assert_uimm_bits!(IMM5, 5);
32258 static_assert_mantissas_sae!(SAE);
32259 let r: i8 = vcmpsd(a, b, IMM5, m:k1 as i8, SAE);
32260 r.cast_unsigned()
32261 }
32262}
32263
32264/// Compare packed unsigned 32-bit integers in a and b for less-than, and store the results in mask vector k.
32265///
32266/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmplt_epu32_mask&expand=1056)
32267#[inline]
32268#[target_feature(enable = "avx512f")]
32269#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32270#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
32271#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32272pub const fn _mm512_cmplt_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 {
32273 unsafe { simd_bitmask::<u32x16, _>(simd_lt(x:a.as_u32x16(), y:b.as_u32x16())) }
32274}
32275
32276/// Compare packed unsigned 32-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32277///
32278/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmplt_epu32_mask&expand=1057)
32279#[inline]
32280#[target_feature(enable = "avx512f")]
32281#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32282#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
32283#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32284pub const fn _mm512_mask_cmplt_epu32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
32285 _mm512_mask_cmp_epu32_mask::<_MM_CMPINT_LT>(k1, a, b)
32286}
32287
32288/// Compare packed unsigned 32-bit integers in a and b for less-than, and store the results in mask vector k.
32289///
32290/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmplt_epu32_mask&expand=1054)
32291#[inline]
32292#[target_feature(enable = "avx512f,avx512vl")]
32293#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32294#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
32295#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32296pub const fn _mm256_cmplt_epu32_mask(a: __m256i, b: __m256i) -> __mmask8 {
32297 unsafe { simd_bitmask::<u32x8, _>(simd_lt(x:a.as_u32x8(), y:b.as_u32x8())) }
32298}
32299
32300/// Compare packed unsigned 32-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32301///
32302/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmplt_epu32_mask&expand=1055)
32303#[inline]
32304#[target_feature(enable = "avx512f,avx512vl")]
32305#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32306#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
32307#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32308pub const fn _mm256_mask_cmplt_epu32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
32309 _mm256_mask_cmp_epu32_mask::<_MM_CMPINT_LT>(k1, a, b)
32310}
32311
32312/// Compare packed unsigned 32-bit integers in a and b for less-than, and store the results in mask vector k.
32313///
32314/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_epu32_mask&expand=1052)
32315#[inline]
32316#[target_feature(enable = "avx512f,avx512vl")]
32317#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32318#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
32319#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32320pub const fn _mm_cmplt_epu32_mask(a: __m128i, b: __m128i) -> __mmask8 {
32321 unsafe { simd_bitmask::<u32x4, _>(simd_lt(x:a.as_u32x4(), y:b.as_u32x4())) }
32322}
32323
32324/// Compare packed unsigned 32-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32325///
32326/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmplt_epu32_mask&expand=1053)
32327#[inline]
32328#[target_feature(enable = "avx512f,avx512vl")]
32329#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32330#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
32331#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32332pub const fn _mm_mask_cmplt_epu32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
32333 _mm_mask_cmp_epu32_mask::<_MM_CMPINT_LT>(k1, a, b)
32334}
32335
32336/// Compare packed unsigned 32-bit integers in a and b for greater-than, and store the results in mask vector k.
32337///
32338/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpgt_epu32_mask&expand=933)
32339#[inline]
32340#[target_feature(enable = "avx512f")]
32341#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32342#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
32343#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32344pub const fn _mm512_cmpgt_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 {
32345 unsafe { simd_bitmask::<u32x16, _>(simd_gt(x:a.as_u32x16(), y:b.as_u32x16())) }
32346}
32347
32348/// Compare packed unsigned 32-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32349///
32350/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpgt_epu32_mask&expand=934)
32351#[inline]
32352#[target_feature(enable = "avx512f")]
32353#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32354#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
32355#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32356pub const fn _mm512_mask_cmpgt_epu32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
32357 _mm512_mask_cmp_epu32_mask::<_MM_CMPINT_NLE>(k1, a, b)
32358}
32359
32360/// Compare packed unsigned 32-bit integers in a and b for greater-than, and store the results in mask vector k.
32361///
32362/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpgt_epu32_mask&expand=931)
32363#[inline]
32364#[target_feature(enable = "avx512f,avx512vl")]
32365#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32366#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
32367#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32368pub const fn _mm256_cmpgt_epu32_mask(a: __m256i, b: __m256i) -> __mmask8 {
32369 unsafe { simd_bitmask::<u32x8, _>(simd_gt(x:a.as_u32x8(), y:b.as_u32x8())) }
32370}
32371
32372/// Compare packed unsigned 32-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32373///
32374/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpgt_epu32_mask&expand=932)
32375#[inline]
32376#[target_feature(enable = "avx512f,avx512vl")]
32377#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32378#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
32379#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32380pub const fn _mm256_mask_cmpgt_epu32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
32381 _mm256_mask_cmp_epu32_mask::<_MM_CMPINT_NLE>(k1, a, b)
32382}
32383
32384/// Compare packed unsigned 32-bit integers in a and b for greater-than, and store the results in mask vector k.
32385///
32386/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_epu32_mask&expand=929)
32387#[inline]
32388#[target_feature(enable = "avx512f,avx512vl")]
32389#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32390#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
32391#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32392pub const fn _mm_cmpgt_epu32_mask(a: __m128i, b: __m128i) -> __mmask8 {
32393 unsafe { simd_bitmask::<u32x4, _>(simd_gt(x:a.as_u32x4(), y:b.as_u32x4())) }
32394}
32395
32396/// Compare packed unsigned 32-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32397///
32398/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpgt_epu32_mask&expand=930)
32399#[inline]
32400#[target_feature(enable = "avx512f,avx512vl")]
32401#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32402#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
32403#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32404pub const fn _mm_mask_cmpgt_epu32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
32405 _mm_mask_cmp_epu32_mask::<_MM_CMPINT_NLE>(k1, a, b)
32406}
32407
32408/// Compare packed unsigned 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
32409///
32410/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmple_epu32_mask&expand=995)
32411#[inline]
32412#[target_feature(enable = "avx512f")]
32413#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32414#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
32415#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32416pub const fn _mm512_cmple_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 {
32417 unsafe { simd_bitmask::<u32x16, _>(simd_le(x:a.as_u32x16(), y:b.as_u32x16())) }
32418}
32419
32420/// Compare packed unsigned 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32421///
32422/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmple_epu32_mask&expand=996)
32423#[inline]
32424#[target_feature(enable = "avx512f")]
32425#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32426#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
32427#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32428pub const fn _mm512_mask_cmple_epu32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
32429 _mm512_mask_cmp_epu32_mask::<_MM_CMPINT_LE>(k1, a, b)
32430}
32431
32432/// Compare packed unsigned 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
32433///
32434/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmple_epu32_mask&expand=993)
32435#[inline]
32436#[target_feature(enable = "avx512f,avx512vl")]
32437#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32438#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
32439#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32440pub const fn _mm256_cmple_epu32_mask(a: __m256i, b: __m256i) -> __mmask8 {
32441 unsafe { simd_bitmask::<u32x8, _>(simd_le(x:a.as_u32x8(), y:b.as_u32x8())) }
32442}
32443
32444/// Compare packed unsigned 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32445///
32446/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmple_epu32_mask&expand=994)
32447#[inline]
32448#[target_feature(enable = "avx512f,avx512vl")]
32449#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32450#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
32451#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32452pub const fn _mm256_mask_cmple_epu32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
32453 _mm256_mask_cmp_epu32_mask::<_MM_CMPINT_LE>(k1, a, b)
32454}
32455
32456/// Compare packed unsigned 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
32457///
32458/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmple_epu32_mask&expand=991)
32459#[inline]
32460#[target_feature(enable = "avx512f,avx512vl")]
32461#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32462#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
32463#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32464pub const fn _mm_cmple_epu32_mask(a: __m128i, b: __m128i) -> __mmask8 {
32465 unsafe { simd_bitmask::<u32x4, _>(simd_le(x:a.as_u32x4(), y:b.as_u32x4())) }
32466}
32467
32468/// Compare packed unsigned 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32469///
32470/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmple_epu32_mask&expand=992)
32471#[inline]
32472#[target_feature(enable = "avx512f,avx512vl")]
32473#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32474#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
32475#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32476pub const fn _mm_mask_cmple_epu32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
32477 _mm_mask_cmp_epu32_mask::<_MM_CMPINT_LE>(k1, a, b)
32478}
32479
32480/// Compare packed unsigned 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
32481///
32482/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpge_epu32_mask&expand=873)
32483#[inline]
32484#[target_feature(enable = "avx512f")]
32485#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32486#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
32487#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32488pub const fn _mm512_cmpge_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 {
32489 unsafe { simd_bitmask::<u32x16, _>(simd_ge(x:a.as_u32x16(), y:b.as_u32x16())) }
32490}
32491
32492/// Compare packed unsigned 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32493///
32494/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpge_epu32_mask&expand=874)
32495#[inline]
32496#[target_feature(enable = "avx512f")]
32497#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32498#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
32499#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32500pub const fn _mm512_mask_cmpge_epu32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
32501 _mm512_mask_cmp_epu32_mask::<_MM_CMPINT_NLT>(k1, a, b)
32502}
32503
32504/// Compare packed unsigned 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
32505///
32506/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpge_epu32_mask&expand=871)
32507#[inline]
32508#[target_feature(enable = "avx512f,avx512vl")]
32509#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32510#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
32511#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32512pub const fn _mm256_cmpge_epu32_mask(a: __m256i, b: __m256i) -> __mmask8 {
32513 unsafe { simd_bitmask::<u32x8, _>(simd_ge(x:a.as_u32x8(), y:b.as_u32x8())) }
32514}
32515
32516/// Compare packed unsigned 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32517///
32518/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpge_epu32_mask&expand=872)
32519#[inline]
32520#[target_feature(enable = "avx512f,avx512vl")]
32521#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32522#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
32523#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32524pub const fn _mm256_mask_cmpge_epu32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
32525 _mm256_mask_cmp_epu32_mask::<_MM_CMPINT_NLT>(k1, a, b)
32526}
32527
32528/// Compare packed unsigned 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
32529///
32530/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpge_epu32_mask&expand=869)
32531#[inline]
32532#[target_feature(enable = "avx512f,avx512vl")]
32533#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32534#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
32535#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32536pub const fn _mm_cmpge_epu32_mask(a: __m128i, b: __m128i) -> __mmask8 {
32537 unsafe { simd_bitmask::<u32x4, _>(simd_ge(x:a.as_u32x4(), y:b.as_u32x4())) }
32538}
32539
32540/// Compare packed unsigned 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32541///
32542/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpge_epu32_mask&expand=870)
32543#[inline]
32544#[target_feature(enable = "avx512f,avx512vl")]
32545#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32546#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
32547#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32548pub const fn _mm_mask_cmpge_epu32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
32549 _mm_mask_cmp_epu32_mask::<_MM_CMPINT_NLT>(k1, a, b)
32550}
32551
32552/// Compare packed unsigned 32-bit integers in a and b for equality, and store the results in mask vector k.
32553///
32554/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpeq_epu32_mask&expand=807)
32555#[inline]
32556#[target_feature(enable = "avx512f")]
32557#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32558#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
32559#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32560pub const fn _mm512_cmpeq_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 {
32561 unsafe { simd_bitmask::<u32x16, _>(simd_eq(x:a.as_u32x16(), y:b.as_u32x16())) }
32562}
32563
32564/// Compare packed unsigned 32-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32565///
32566/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpeq_epu32_mask&expand=808)
32567#[inline]
32568#[target_feature(enable = "avx512f")]
32569#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32570#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
32571#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32572pub const fn _mm512_mask_cmpeq_epu32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
32573 _mm512_mask_cmp_epu32_mask::<_MM_CMPINT_EQ>(k1, a, b)
32574}
32575
32576/// Compare packed unsigned 32-bit integers in a and b for equality, and store the results in mask vector k.
32577///
32578/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpeq_epu32_mask&expand=805)
32579#[inline]
32580#[target_feature(enable = "avx512f,avx512vl")]
32581#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32582#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
32583#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32584pub const fn _mm256_cmpeq_epu32_mask(a: __m256i, b: __m256i) -> __mmask8 {
32585 unsafe { simd_bitmask::<u32x8, _>(simd_eq(x:a.as_u32x8(), y:b.as_u32x8())) }
32586}
32587
32588/// Compare packed unsigned 32-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32589///
32590/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpeq_epu32_mask&expand=806)
32591#[inline]
32592#[target_feature(enable = "avx512f,avx512vl")]
32593#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32594#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
32595#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32596pub const fn _mm256_mask_cmpeq_epu32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
32597 _mm256_mask_cmp_epu32_mask::<_MM_CMPINT_EQ>(k1, a, b)
32598}
32599
32600/// Compare packed unsigned 32-bit integers in a and b for equality, and store the results in mask vector k.
32601///
32602/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_epu32_mask&expand=803)
32603#[inline]
32604#[target_feature(enable = "avx512f,avx512vl")]
32605#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32606#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
32607#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32608pub const fn _mm_cmpeq_epu32_mask(a: __m128i, b: __m128i) -> __mmask8 {
32609 unsafe { simd_bitmask::<u32x4, _>(simd_eq(x:a.as_u32x4(), y:b.as_u32x4())) }
32610}
32611
32612/// Compare packed unsigned 32-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32613///
32614/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpeq_epu32_mask&expand=804)
32615#[inline]
32616#[target_feature(enable = "avx512f,avx512vl")]
32617#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32618#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
32619#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32620pub const fn _mm_mask_cmpeq_epu32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
32621 _mm_mask_cmp_epu32_mask::<_MM_CMPINT_EQ>(k1, a, b)
32622}
32623
32624/// Compare packed unsigned 32-bit integers in a and b for not-equal, and store the results in mask vector k.
32625///
32626/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpneq_epu32_mask&expand=1112)
32627#[inline]
32628#[target_feature(enable = "avx512f")]
32629#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32630#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
32631#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32632pub const fn _mm512_cmpneq_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 {
32633 unsafe { simd_bitmask::<u32x16, _>(simd_ne(x:a.as_u32x16(), y:b.as_u32x16())) }
32634}
32635
32636/// Compare packed unsigned 32-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32637///
32638/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpneq_epu32_mask&expand=1113)
32639#[inline]
32640#[target_feature(enable = "avx512f")]
32641#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32642#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
32643#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32644pub const fn _mm512_mask_cmpneq_epu32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
32645 _mm512_mask_cmp_epu32_mask::<_MM_CMPINT_NE>(k1, a, b)
32646}
32647
32648/// Compare packed unsigned 32-bit integers in a and b for not-equal, and store the results in mask vector k.
32649///
32650/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpneq_epu32_mask&expand=1110)
32651#[inline]
32652#[target_feature(enable = "avx512f,avx512vl")]
32653#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32654#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
32655#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32656pub const fn _mm256_cmpneq_epu32_mask(a: __m256i, b: __m256i) -> __mmask8 {
32657 unsafe { simd_bitmask::<u32x8, _>(simd_ne(x:a.as_u32x8(), y:b.as_u32x8())) }
32658}
32659
32660/// Compare packed unsigned 32-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32661///
32662/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpneq_epu32_mask&expand=1111)
32663#[inline]
32664#[target_feature(enable = "avx512f,avx512vl")]
32665#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32666#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
32667#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32668pub const fn _mm256_mask_cmpneq_epu32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
32669 _mm256_mask_cmp_epu32_mask::<_MM_CMPINT_NE>(k1, a, b)
32670}
32671
32672/// Compare packed unsigned 32-bit integers in a and b for not-equal, and store the results in mask vector k.
32673///
32674/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpneq_epu32_mask&expand=1108)
32675#[inline]
32676#[target_feature(enable = "avx512f,avx512vl")]
32677#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32678#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
32679#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32680pub const fn _mm_cmpneq_epu32_mask(a: __m128i, b: __m128i) -> __mmask8 {
32681 unsafe { simd_bitmask::<u32x4, _>(simd_ne(x:a.as_u32x4(), y:b.as_u32x4())) }
32682}
32683
32684/// Compare packed unsigned 32-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32685///
32686/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpneq_epu32_mask&expand=1109)
32687#[inline]
32688#[target_feature(enable = "avx512f,avx512vl")]
32689#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32690#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
32691#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32692pub const fn _mm_mask_cmpneq_epu32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
32693 _mm_mask_cmp_epu32_mask::<_MM_CMPINT_NE>(k1, a, b)
32694}
32695
32696/// Compare packed unsigned 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
32697///
32698/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmp_epu32_mask&expand=721)
32699#[inline]
32700#[target_feature(enable = "avx512f")]
32701#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32702#[rustc_legacy_const_generics(2)]
32703#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
32704#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32705pub const fn _mm512_cmp_epu32_mask<const IMM3: _MM_CMPINT_ENUM>(
32706 a: __m512i,
32707 b: __m512i,
32708) -> __mmask16 {
32709 unsafe {
32710 static_assert_uimm_bits!(IMM3, 3);
32711 let a: Simd = a.as_u32x16();
32712 let b: Simd = b.as_u32x16();
32713 let r: Simd = match IMM3 {
32714 0 => simd_eq(x:a, y:b),
32715 1 => simd_lt(x:a, y:b),
32716 2 => simd_le(x:a, y:b),
32717 3 => i32x16::ZERO,
32718 4 => simd_ne(x:a, y:b),
32719 5 => simd_ge(x:a, y:b),
32720 6 => simd_gt(x:a, y:b),
32721 _ => i32x16::splat(-1),
32722 };
32723 simd_bitmask(r)
32724 }
32725}
32726
32727/// Compare packed unsigned 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32728///
32729/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmp_epu32_mask&expand=722)
32730#[inline]
32731#[target_feature(enable = "avx512f")]
32732#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32733#[rustc_legacy_const_generics(3)]
32734#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
32735#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32736pub const fn _mm512_mask_cmp_epu32_mask<const IMM3: _MM_CMPINT_ENUM>(
32737 k1: __mmask16,
32738 a: __m512i,
32739 b: __m512i,
32740) -> __mmask16 {
32741 unsafe {
32742 static_assert_uimm_bits!(IMM3, 3);
32743 let a: Simd = a.as_u32x16();
32744 let b: Simd = b.as_u32x16();
32745 let k1: Simd = simd_select_bitmask(m:k1, yes:i32x16::splat(-1), no:i32x16::ZERO);
32746 let r: Simd = match IMM3 {
32747 0 => simd_and(x:k1, y:simd_eq(x:a, y:b)),
32748 1 => simd_and(x:k1, y:simd_lt(x:a, y:b)),
32749 2 => simd_and(x:k1, y:simd_le(x:a, y:b)),
32750 3 => i32x16::ZERO,
32751 4 => simd_and(x:k1, y:simd_ne(x:a, y:b)),
32752 5 => simd_and(x:k1, y:simd_ge(x:a, y:b)),
32753 6 => simd_and(x:k1, y:simd_gt(x:a, y:b)),
32754 _ => k1,
32755 };
32756 simd_bitmask(r)
32757 }
32758}
32759
32760/// Compare packed unsigned 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
32761///
32762/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmp_epu32_mask&expand=719)
32763#[inline]
32764#[target_feature(enable = "avx512f,avx512vl")]
32765#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32766#[rustc_legacy_const_generics(2)]
32767#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
32768#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32769pub const fn _mm256_cmp_epu32_mask<const IMM3: _MM_CMPINT_ENUM>(
32770 a: __m256i,
32771 b: __m256i,
32772) -> __mmask8 {
32773 unsafe {
32774 static_assert_uimm_bits!(IMM3, 3);
32775 let a: Simd = a.as_u32x8();
32776 let b: Simd = b.as_u32x8();
32777 let r: Simd = match IMM3 {
32778 0 => simd_eq(x:a, y:b),
32779 1 => simd_lt(x:a, y:b),
32780 2 => simd_le(x:a, y:b),
32781 3 => i32x8::ZERO,
32782 4 => simd_ne(x:a, y:b),
32783 5 => simd_ge(x:a, y:b),
32784 6 => simd_gt(x:a, y:b),
32785 _ => i32x8::splat(-1),
32786 };
32787 simd_bitmask(r)
32788 }
32789}
32790
32791/// Compare packed unsigned 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32792///
32793/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmp_epu32_mask&expand=720)
32794#[inline]
32795#[target_feature(enable = "avx512f,avx512vl")]
32796#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32797#[rustc_legacy_const_generics(3)]
32798#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
32799#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32800pub const fn _mm256_mask_cmp_epu32_mask<const IMM3: _MM_CMPINT_ENUM>(
32801 k1: __mmask8,
32802 a: __m256i,
32803 b: __m256i,
32804) -> __mmask8 {
32805 unsafe {
32806 static_assert_uimm_bits!(IMM3, 3);
32807 let a: Simd = a.as_u32x8();
32808 let b: Simd = b.as_u32x8();
32809 let k1: Simd = simd_select_bitmask(m:k1, yes:i32x8::splat(-1), no:i32x8::ZERO);
32810 let r: Simd = match IMM3 {
32811 0 => simd_and(x:k1, y:simd_eq(x:a, y:b)),
32812 1 => simd_and(x:k1, y:simd_lt(x:a, y:b)),
32813 2 => simd_and(x:k1, y:simd_le(x:a, y:b)),
32814 3 => i32x8::ZERO,
32815 4 => simd_and(x:k1, y:simd_ne(x:a, y:b)),
32816 5 => simd_and(x:k1, y:simd_ge(x:a, y:b)),
32817 6 => simd_and(x:k1, y:simd_gt(x:a, y:b)),
32818 _ => k1,
32819 };
32820 simd_bitmask(r)
32821 }
32822}
32823
32824/// Compare packed unsigned 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
32825///
32826/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_epu32_mask&expand=717)
32827#[inline]
32828#[target_feature(enable = "avx512f,avx512vl")]
32829#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32830#[rustc_legacy_const_generics(2)]
32831#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
32832#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32833pub const fn _mm_cmp_epu32_mask<const IMM3: _MM_CMPINT_ENUM>(a: __m128i, b: __m128i) -> __mmask8 {
32834 unsafe {
32835 static_assert_uimm_bits!(IMM3, 3);
32836 let a: Simd = a.as_u32x4();
32837 let b: Simd = b.as_u32x4();
32838 let r: Simd = match IMM3 {
32839 0 => simd_eq(x:a, y:b),
32840 1 => simd_lt(x:a, y:b),
32841 2 => simd_le(x:a, y:b),
32842 3 => i32x4::ZERO,
32843 4 => simd_ne(x:a, y:b),
32844 5 => simd_ge(x:a, y:b),
32845 6 => simd_gt(x:a, y:b),
32846 _ => i32x4::splat(-1),
32847 };
32848 simd_bitmask(r)
32849 }
32850}
32851
32852/// Compare packed unsigned 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32853///
32854/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_epu32_mask&expand=718)
32855#[inline]
32856#[target_feature(enable = "avx512f,avx512vl")]
32857#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32858#[rustc_legacy_const_generics(3)]
32859#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
32860#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32861pub const fn _mm_mask_cmp_epu32_mask<const IMM3: _MM_CMPINT_ENUM>(
32862 k1: __mmask8,
32863 a: __m128i,
32864 b: __m128i,
32865) -> __mmask8 {
32866 unsafe {
32867 static_assert_uimm_bits!(IMM3, 3);
32868 let a: Simd = a.as_u32x4();
32869 let b: Simd = b.as_u32x4();
32870 let k1: Simd = simd_select_bitmask(m:k1, yes:i32x4::splat(-1), no:i32x4::ZERO);
32871 let r: Simd = match IMM3 {
32872 0 => simd_and(x:k1, y:simd_eq(x:a, y:b)),
32873 1 => simd_and(x:k1, y:simd_lt(x:a, y:b)),
32874 2 => simd_and(x:k1, y:simd_le(x:a, y:b)),
32875 3 => i32x4::ZERO,
32876 4 => simd_and(x:k1, y:simd_ne(x:a, y:b)),
32877 5 => simd_and(x:k1, y:simd_ge(x:a, y:b)),
32878 6 => simd_and(x:k1, y:simd_gt(x:a, y:b)),
32879 _ => k1,
32880 };
32881 simd_bitmask(r)
32882 }
32883}
32884
32885/// Compare packed signed 32-bit integers in a and b for less-than, and store the results in mask vector k.
32886///
32887/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmplt_epi32_mask&expand=1029)
32888#[inline]
32889#[target_feature(enable = "avx512f")]
32890#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32891#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
32892#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32893pub const fn _mm512_cmplt_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
32894 unsafe { simd_bitmask::<i32x16, _>(simd_lt(x:a.as_i32x16(), y:b.as_i32x16())) }
32895}
32896
32897/// Compare packed signed 32-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32898///
32899/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmplt_epi32_mask&expand=1031)
32900#[inline]
32901#[target_feature(enable = "avx512f")]
32902#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32903#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
32904#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32905pub const fn _mm512_mask_cmplt_epi32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
32906 _mm512_mask_cmp_epi32_mask::<_MM_CMPINT_LT>(k1, a, b)
32907}
32908
32909/// Compare packed signed 32-bit integers in a and b for less-than, and store the results in mask vector k.
32910///
32911/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmplt_epi32_mask&expand=1027)
32912#[inline]
32913#[target_feature(enable = "avx512f,avx512vl")]
32914#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32915#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
32916#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32917pub const fn _mm256_cmplt_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 {
32918 unsafe { simd_bitmask::<i32x8, _>(simd_lt(x:a.as_i32x8(), y:b.as_i32x8())) }
32919}
32920
32921/// Compare packed signed 32-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32922///
32923/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmplt_epi32_mask&expand=1028)
32924#[inline]
32925#[target_feature(enable = "avx512f,avx512vl")]
32926#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32927#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
32928#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32929pub const fn _mm256_mask_cmplt_epi32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
32930 _mm256_mask_cmp_epi32_mask::<_MM_CMPINT_LT>(k1, a, b)
32931}
32932
32933/// Compare packed signed 32-bit integers in a and b for less-than, and store the results in mask vector k.
32934///
32935/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_epi32_mask&expand=1025)
32936#[inline]
32937#[target_feature(enable = "avx512f,avx512vl")]
32938#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32939#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
32940#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32941pub const fn _mm_cmplt_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 {
32942 unsafe { simd_bitmask::<i32x4, _>(simd_lt(x:a.as_i32x4(), y:b.as_i32x4())) }
32943}
32944
32945/// Compare packed signed 32-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32946///
32947/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmplt_epi32_mask&expand=1026)
32948#[inline]
32949#[target_feature(enable = "avx512f,avx512vl")]
32950#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32951#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
32952#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32953pub const fn _mm_mask_cmplt_epi32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
32954 _mm_mask_cmp_epi32_mask::<_MM_CMPINT_LT>(k1, a, b)
32955}
32956
32957/// Compare packed signed 32-bit integers in a and b for greater-than, and store the results in mask vector k.
32958///
32959/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpgt_epi32_mask&expand=905)
32960#[inline]
32961#[target_feature(enable = "avx512f")]
32962#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32963#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
32964#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32965pub const fn _mm512_cmpgt_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
32966 unsafe { simd_bitmask::<i32x16, _>(simd_gt(x:a.as_i32x16(), y:b.as_i32x16())) }
32967}
32968
32969/// Compare packed signed 32-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32970///
32971/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpgt_epi32_mask&expand=906)
32972#[inline]
32973#[target_feature(enable = "avx512f")]
32974#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32975#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
32976#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32977pub const fn _mm512_mask_cmpgt_epi32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
32978 _mm512_mask_cmp_epi32_mask::<_MM_CMPINT_NLE>(k1, a, b)
32979}
32980
32981/// Compare packed signed 32-bit integers in a and b for greater-than, and store the results in mask vector k.
32982///
32983/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpgt_epi32_mask&expand=903)
32984#[inline]
32985#[target_feature(enable = "avx512f,avx512vl")]
32986#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32987#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
32988#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32989pub const fn _mm256_cmpgt_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 {
32990 unsafe { simd_bitmask::<i32x8, _>(simd_gt(x:a.as_i32x8(), y:b.as_i32x8())) }
32991}
32992
32993/// Compare packed signed 32-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32994///
32995/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpgt_epi32_mask&expand=904)
32996#[inline]
32997#[target_feature(enable = "avx512f,avx512vl")]
32998#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32999#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
33000#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33001pub const fn _mm256_mask_cmpgt_epi32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
33002 _mm256_mask_cmp_epi32_mask::<_MM_CMPINT_NLE>(k1, a, b)
33003}
33004
33005/// Compare packed signed 32-bit integers in a and b for greater-than, and store the results in mask vector k.
33006///
33007/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_epi32_mask&expand=901)
33008#[inline]
33009#[target_feature(enable = "avx512f,avx512vl")]
33010#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33011#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
33012#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33013pub const fn _mm_cmpgt_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 {
33014 unsafe { simd_bitmask::<i32x4, _>(simd_gt(x:a.as_i32x4(), y:b.as_i32x4())) }
33015}
33016
33017/// Compare packed signed 32-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33018///
33019/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpgt_epi32_mask&expand=902)
33020#[inline]
33021#[target_feature(enable = "avx512f,avx512vl")]
33022#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33023#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
33024#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33025pub const fn _mm_mask_cmpgt_epi32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
33026 _mm_mask_cmp_epi32_mask::<_MM_CMPINT_NLE>(k1, a, b)
33027}
33028
33029/// Compare packed signed 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
33030///
33031/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmple_epi32_mask&expand=971)
33032#[inline]
33033#[target_feature(enable = "avx512f")]
33034#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33035#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
33036#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33037pub const fn _mm512_cmple_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
33038 unsafe { simd_bitmask::<i32x16, _>(simd_le(x:a.as_i32x16(), y:b.as_i32x16())) }
33039}
33040
33041/// Compare packed signed 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33042///
33043/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmple_epi32_mask&expand=972)
33044#[inline]
33045#[target_feature(enable = "avx512f")]
33046#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33047#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
33048#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33049pub const fn _mm512_mask_cmple_epi32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
33050 _mm512_mask_cmp_epi32_mask::<_MM_CMPINT_LE>(k1, a, b)
33051}
33052
33053/// Compare packed signed 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
33054///
33055/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmple_epi32_mask&expand=969)
33056#[inline]
33057#[target_feature(enable = "avx512f,avx512vl")]
33058#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33059#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
33060#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33061pub const fn _mm256_cmple_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 {
33062 unsafe { simd_bitmask::<i32x8, _>(simd_le(x:a.as_i32x8(), y:b.as_i32x8())) }
33063}
33064
33065/// Compare packed signed 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33066///
33067/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmple_epi32_mask&expand=970)
33068#[inline]
33069#[target_feature(enable = "avx512f,avx512vl")]
33070#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33071#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
33072#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33073pub const fn _mm256_mask_cmple_epi32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
33074 _mm256_mask_cmp_epi32_mask::<_MM_CMPINT_LE>(k1, a, b)
33075}
33076
33077/// Compare packed signed 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
33078///
33079/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmple_epi32_mask&expand=967)
33080#[inline]
33081#[target_feature(enable = "avx512f,avx512vl")]
33082#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33083#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
33084#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33085pub const fn _mm_cmple_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 {
33086 unsafe { simd_bitmask::<i32x4, _>(simd_le(x:a.as_i32x4(), y:b.as_i32x4())) }
33087}
33088
33089/// Compare packed signed 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33090///
33091/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmple_epi32_mask&expand=968)
33092#[inline]
33093#[target_feature(enable = "avx512f,avx512vl")]
33094#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33095#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
33096#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33097pub const fn _mm_mask_cmple_epi32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
33098 _mm_mask_cmp_epi32_mask::<_MM_CMPINT_LE>(k1, a, b)
33099}
33100
33101/// Compare packed signed 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
33102///
33103/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpge_epi32_mask&expand=849)
33104#[inline]
33105#[target_feature(enable = "avx512f")]
33106#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33107#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
33108#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33109pub const fn _mm512_cmpge_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
33110 unsafe { simd_bitmask::<i32x16, _>(simd_ge(x:a.as_i32x16(), y:b.as_i32x16())) }
33111}
33112
33113/// Compare packed signed 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33114///
33115/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpge_epi32_mask&expand=850)
33116#[inline]
33117#[target_feature(enable = "avx512f")]
33118#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33119#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
33120#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33121pub const fn _mm512_mask_cmpge_epi32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
33122 _mm512_mask_cmp_epi32_mask::<_MM_CMPINT_NLT>(k1, a, b)
33123}
33124
33125/// Compare packed signed 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
33126///
33127/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpge_epi32_mask&expand=847)
33128#[inline]
33129#[target_feature(enable = "avx512f,avx512vl")]
33130#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33131#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
33132#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33133pub const fn _mm256_cmpge_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 {
33134 unsafe { simd_bitmask::<i32x8, _>(simd_ge(x:a.as_i32x8(), y:b.as_i32x8())) }
33135}
33136
33137/// Compare packed signed 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33138///
33139/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpge_epi32_mask&expand=848)
33140#[inline]
33141#[target_feature(enable = "avx512f,avx512vl")]
33142#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33143#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
33144#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33145pub const fn _mm256_mask_cmpge_epi32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
33146 _mm256_mask_cmp_epi32_mask::<_MM_CMPINT_NLT>(k1, a, b)
33147}
33148
33149/// Compare packed signed 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
33150///
33151/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpge_epi32_mask&expand=845)
33152#[inline]
33153#[target_feature(enable = "avx512f,avx512vl")]
33154#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33155#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
33156#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33157pub const fn _mm_cmpge_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 {
33158 unsafe { simd_bitmask::<i32x4, _>(simd_ge(x:a.as_i32x4(), y:b.as_i32x4())) }
33159}
33160
33161/// Compare packed signed 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33162///
33163/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpge_epi32_mask&expand=846)
33164#[inline]
33165#[target_feature(enable = "avx512f,avx512vl")]
33166#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33167#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
33168#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33169pub const fn _mm_mask_cmpge_epi32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
33170 _mm_mask_cmp_epi32_mask::<_MM_CMPINT_NLT>(k1, a, b)
33171}
33172
33173/// Compare packed 32-bit integers in a and b for equality, and store the results in mask vector k.
33174///
33175/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpeq_epi32_mask&expand=779)
33176#[inline]
33177#[target_feature(enable = "avx512f")]
33178#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33179#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
33180#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33181pub const fn _mm512_cmpeq_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
33182 unsafe { simd_bitmask::<i32x16, _>(simd_eq(x:a.as_i32x16(), y:b.as_i32x16())) }
33183}
33184
33185/// Compare packed 32-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33186///
33187/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpeq_epi32_mask&expand=780)
33188#[inline]
33189#[target_feature(enable = "avx512f")]
33190#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33191#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
33192#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33193pub const fn _mm512_mask_cmpeq_epi32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
33194 _mm512_mask_cmp_epi32_mask::<_MM_CMPINT_EQ>(k1, a, b)
33195}
33196
33197/// Compare packed 32-bit integers in a and b for equality, and store the results in mask vector k.
33198///
33199/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpeq_epi32_mask&expand=777)
33200#[inline]
33201#[target_feature(enable = "avx512f,avx512vl")]
33202#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33203#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
33204#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33205pub const fn _mm256_cmpeq_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 {
33206 unsafe { simd_bitmask::<i32x8, _>(simd_eq(x:a.as_i32x8(), y:b.as_i32x8())) }
33207}
33208
33209/// Compare packed 32-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33210///
33211/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpeq_epi32_mask&expand=778)
33212#[inline]
33213#[target_feature(enable = "avx512f,avx512vl")]
33214#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33215#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
33216#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33217pub const fn _mm256_mask_cmpeq_epi32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
33218 _mm256_mask_cmp_epi32_mask::<_MM_CMPINT_EQ>(k1, a, b)
33219}
33220
33221/// Compare packed 32-bit integers in a and b for equality, and store the results in mask vector k.
33222///
33223/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_epi32_mask&expand=775)
33224#[inline]
33225#[target_feature(enable = "avx512f,avx512vl")]
33226#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33227#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
33228#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33229pub const fn _mm_cmpeq_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 {
33230 unsafe { simd_bitmask::<i32x4, _>(simd_eq(x:a.as_i32x4(), y:b.as_i32x4())) }
33231}
33232
33233/// Compare packed 32-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33234///
33235/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpeq_epi32_mask&expand=776)
33236#[inline]
33237#[target_feature(enable = "avx512f,avx512vl")]
33238#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33239#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
33240#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33241pub const fn _mm_mask_cmpeq_epi32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
33242 _mm_mask_cmp_epi32_mask::<_MM_CMPINT_EQ>(k1, a, b)
33243}
33244
33245/// Compare packed 32-bit integers in a and b for not-equal, and store the results in mask vector k.
33246///
33247/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpneq_epi32_mask&expand=1088)
33248#[inline]
33249#[target_feature(enable = "avx512f")]
33250#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33251#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
33252#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33253pub const fn _mm512_cmpneq_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
33254 unsafe { simd_bitmask::<i32x16, _>(simd_ne(x:a.as_i32x16(), y:b.as_i32x16())) }
33255}
33256
33257/// Compare packed 32-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33258///
33259/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpneq_epi32_mask&expand=1089)
33260#[inline]
33261#[target_feature(enable = "avx512f")]
33262#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33263#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
33264#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33265pub const fn _mm512_mask_cmpneq_epi32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
33266 _mm512_mask_cmp_epi32_mask::<_MM_CMPINT_NE>(k1, a, b)
33267}
33268
33269/// Compare packed 32-bit integers in a and b for not-equal, and store the results in mask vector k.
33270///
33271/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpneq_epi32_mask&expand=1086)
33272#[inline]
33273#[target_feature(enable = "avx512f,avx512vl")]
33274#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33275#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
33276#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33277pub const fn _mm256_cmpneq_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 {
33278 unsafe { simd_bitmask::<i32x8, _>(simd_ne(x:a.as_i32x8(), y:b.as_i32x8())) }
33279}
33280
33281/// Compare packed 32-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33282///
33283/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpneq_epi32_mask&expand=1087)
33284#[inline]
33285#[target_feature(enable = "avx512f,avx512vl")]
33286#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33287#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
33288#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33289pub const fn _mm256_mask_cmpneq_epi32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
33290 _mm256_mask_cmp_epi32_mask::<_MM_CMPINT_NE>(k1, a, b)
33291}
33292
33293/// Compare packed 32-bit integers in a and b for not-equal, and store the results in mask vector k.
33294///
33295/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpneq_epi32_mask&expand=1084)
33296#[inline]
33297#[target_feature(enable = "avx512f,avx512vl")]
33298#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33299#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
33300#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33301pub const fn _mm_cmpneq_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 {
33302 unsafe { simd_bitmask::<i32x4, _>(simd_ne(x:a.as_i32x4(), y:b.as_i32x4())) }
33303}
33304
33305/// Compare packed 32-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33306///
33307/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpneq_epi32_mask&expand=1085)
33308#[inline]
33309#[target_feature(enable = "avx512f,avx512vl")]
33310#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33311#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
33312#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33313pub const fn _mm_mask_cmpneq_epi32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
33314 _mm_mask_cmp_epi32_mask::<_MM_CMPINT_NE>(k1, a, b)
33315}
33316
33317/// Compare packed signed 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
33318///
33319/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmp_epi32_mask&expand=697)
33320#[inline]
33321#[target_feature(enable = "avx512f")]
33322#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33323#[rustc_legacy_const_generics(2)]
33324#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
33325#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33326pub const fn _mm512_cmp_epi32_mask<const IMM3: _MM_CMPINT_ENUM>(
33327 a: __m512i,
33328 b: __m512i,
33329) -> __mmask16 {
33330 unsafe {
33331 static_assert_uimm_bits!(IMM3, 3);
33332 let a: Simd = a.as_i32x16();
33333 let b: Simd = b.as_i32x16();
33334 let r: Simd = match IMM3 {
33335 0 => simd_eq(x:a, y:b),
33336 1 => simd_lt(x:a, y:b),
33337 2 => simd_le(x:a, y:b),
33338 3 => i32x16::ZERO,
33339 4 => simd_ne(x:a, y:b),
33340 5 => simd_ge(x:a, y:b),
33341 6 => simd_gt(x:a, y:b),
33342 _ => i32x16::splat(-1),
33343 };
33344 simd_bitmask(r)
33345 }
33346}
33347
33348/// Compare packed signed 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33349///
33350/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmp_epi32_mask&expand=698)
33351#[inline]
33352#[target_feature(enable = "avx512f")]
33353#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33354#[rustc_legacy_const_generics(3)]
33355#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
33356#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33357pub const fn _mm512_mask_cmp_epi32_mask<const IMM3: _MM_CMPINT_ENUM>(
33358 k1: __mmask16,
33359 a: __m512i,
33360 b: __m512i,
33361) -> __mmask16 {
33362 unsafe {
33363 static_assert_uimm_bits!(IMM3, 3);
33364 let a: Simd = a.as_i32x16();
33365 let b: Simd = b.as_i32x16();
33366 let k1: Simd = simd_select_bitmask(m:k1, yes:i32x16::splat(-1), no:i32x16::ZERO);
33367 let r: Simd = match IMM3 {
33368 0 => simd_and(x:k1, y:simd_eq(x:a, y:b)),
33369 1 => simd_and(x:k1, y:simd_lt(x:a, y:b)),
33370 2 => simd_and(x:k1, y:simd_le(x:a, y:b)),
33371 3 => i32x16::ZERO,
33372 4 => simd_and(x:k1, y:simd_ne(x:a, y:b)),
33373 5 => simd_and(x:k1, y:simd_ge(x:a, y:b)),
33374 6 => simd_and(x:k1, y:simd_gt(x:a, y:b)),
33375 _ => k1,
33376 };
33377 simd_bitmask(r)
33378 }
33379}
33380
33381/// Compare packed signed 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
33382///
33383/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=#text=_mm256_cmp_epi32_mask&expand=695)
33384#[inline]
33385#[target_feature(enable = "avx512f,avx512vl")]
33386#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33387#[rustc_legacy_const_generics(2)]
33388#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
33389#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33390pub const fn _mm256_cmp_epi32_mask<const IMM3: _MM_CMPINT_ENUM>(
33391 a: __m256i,
33392 b: __m256i,
33393) -> __mmask8 {
33394 unsafe {
33395 static_assert_uimm_bits!(IMM3, 3);
33396 let a: Simd = a.as_i32x8();
33397 let b: Simd = b.as_i32x8();
33398 let r: Simd = match IMM3 {
33399 0 => simd_eq(x:a, y:b),
33400 1 => simd_lt(x:a, y:b),
33401 2 => simd_le(x:a, y:b),
33402 3 => i32x8::ZERO,
33403 4 => simd_ne(x:a, y:b),
33404 5 => simd_ge(x:a, y:b),
33405 6 => simd_gt(x:a, y:b),
33406 _ => i32x8::splat(-1),
33407 };
33408 simd_bitmask(r)
33409 }
33410}
33411
33412/// Compare packed signed 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33413///
33414/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmp_epi32_mask&expand=696)
33415#[inline]
33416#[target_feature(enable = "avx512f,avx512vl")]
33417#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33418#[rustc_legacy_const_generics(3)]
33419#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
33420#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33421pub const fn _mm256_mask_cmp_epi32_mask<const IMM3: _MM_CMPINT_ENUM>(
33422 k1: __mmask8,
33423 a: __m256i,
33424 b: __m256i,
33425) -> __mmask8 {
33426 unsafe {
33427 static_assert_uimm_bits!(IMM3, 3);
33428 let a: Simd = a.as_i32x8();
33429 let b: Simd = b.as_i32x8();
33430 let k1: Simd = simd_select_bitmask(m:k1, yes:i32x8::splat(-1), no:i32x8::ZERO);
33431 let r: Simd = match IMM3 {
33432 0 => simd_and(x:k1, y:simd_eq(x:a, y:b)),
33433 1 => simd_and(x:k1, y:simd_lt(x:a, y:b)),
33434 2 => simd_and(x:k1, y:simd_le(x:a, y:b)),
33435 3 => i32x8::ZERO,
33436 4 => simd_and(x:k1, y:simd_ne(x:a, y:b)),
33437 5 => simd_and(x:k1, y:simd_ge(x:a, y:b)),
33438 6 => simd_and(x:k1, y:simd_gt(x:a, y:b)),
33439 _ => k1,
33440 };
33441 simd_bitmask(r)
33442 }
33443}
33444
33445/// Compare packed signed 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
33446///
33447/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_epi32_mask&expand=693)
33448#[inline]
33449#[target_feature(enable = "avx512f,avx512vl")]
33450#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33451#[rustc_legacy_const_generics(2)]
33452#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
33453#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33454pub const fn _mm_cmp_epi32_mask<const IMM3: _MM_CMPINT_ENUM>(a: __m128i, b: __m128i) -> __mmask8 {
33455 unsafe {
33456 static_assert_uimm_bits!(IMM3, 3);
33457 let a: Simd = a.as_i32x4();
33458 let b: Simd = b.as_i32x4();
33459 let r: Simd = match IMM3 {
33460 0 => simd_eq(x:a, y:b),
33461 1 => simd_lt(x:a, y:b),
33462 2 => simd_le(x:a, y:b),
33463 3 => i32x4::ZERO,
33464 4 => simd_ne(x:a, y:b),
33465 5 => simd_ge(x:a, y:b),
33466 6 => simd_gt(x:a, y:b),
33467 _ => i32x4::splat(-1),
33468 };
33469 simd_bitmask(r)
33470 }
33471}
33472
33473/// Compare packed signed 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33474///
33475/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_epi32_mask&expand=694)
33476#[inline]
33477#[target_feature(enable = "avx512f,avx512vl")]
33478#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33479#[rustc_legacy_const_generics(3)]
33480#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
33481#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33482pub const fn _mm_mask_cmp_epi32_mask<const IMM3: _MM_CMPINT_ENUM>(
33483 k1: __mmask8,
33484 a: __m128i,
33485 b: __m128i,
33486) -> __mmask8 {
33487 unsafe {
33488 static_assert_uimm_bits!(IMM3, 3);
33489 let a: Simd = a.as_i32x4();
33490 let b: Simd = b.as_i32x4();
33491 let k1: Simd = simd_select_bitmask(m:k1, yes:i32x4::splat(-1), no:i32x4::ZERO);
33492 let r: Simd = match IMM3 {
33493 0 => simd_and(x:k1, y:simd_eq(x:a, y:b)),
33494 1 => simd_and(x:k1, y:simd_lt(x:a, y:b)),
33495 2 => simd_and(x:k1, y:simd_le(x:a, y:b)),
33496 3 => i32x4::ZERO,
33497 4 => simd_and(x:k1, y:simd_ne(x:a, y:b)),
33498 5 => simd_and(x:k1, y:simd_ge(x:a, y:b)),
33499 6 => simd_and(x:k1, y:simd_gt(x:a, y:b)),
33500 _ => k1,
33501 };
33502 simd_bitmask(r)
33503 }
33504}
33505
33506/// Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in mask vector k.
33507///
33508/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmplt_epu64_mask&expand=1062)
33509#[inline]
33510#[target_feature(enable = "avx512f")]
33511#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33512#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
33513#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33514pub const fn _mm512_cmplt_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
33515 unsafe { simd_bitmask::<__m512i, _>(simd_lt(x:a.as_u64x8(), y:b.as_u64x8())) }
33516}
33517
33518/// Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33519///
33520/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmplt_epu64_mask&expand=1063)
33521#[inline]
33522#[target_feature(enable = "avx512f")]
33523#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33524#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
33525#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33526pub const fn _mm512_mask_cmplt_epu64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
33527 _mm512_mask_cmp_epu64_mask::<_MM_CMPINT_LT>(k1, a, b)
33528}
33529
33530/// Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in mask vector k.
33531///
33532/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmplt_epu64_mask&expand=1060)
33533#[inline]
33534#[target_feature(enable = "avx512f,avx512vl")]
33535#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33536#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
33537#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33538pub const fn _mm256_cmplt_epu64_mask(a: __m256i, b: __m256i) -> __mmask8 {
33539 unsafe { simd_bitmask::<__m256i, _>(simd_lt(x:a.as_u64x4(), y:b.as_u64x4())) }
33540}
33541
33542/// Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33543///
33544/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmplt_epu64_mask&expand=1061)
33545#[inline]
33546#[target_feature(enable = "avx512f,avx512vl")]
33547#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33548#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
33549#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33550pub const fn _mm256_mask_cmplt_epu64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
33551 _mm256_mask_cmp_epu64_mask::<_MM_CMPINT_LT>(k1, a, b)
33552}
33553
33554/// Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in mask vector k.
33555///
33556/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_epu64_mask&expand=1058)
33557#[inline]
33558#[target_feature(enable = "avx512f,avx512vl")]
33559#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33560#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
33561#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33562pub const fn _mm_cmplt_epu64_mask(a: __m128i, b: __m128i) -> __mmask8 {
33563 unsafe { simd_bitmask::<__m128i, _>(simd_lt(x:a.as_u64x2(), y:b.as_u64x2())) }
33564}
33565
33566/// Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33567///
33568/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmplt_epu64_mask&expand=1059)
33569#[inline]
33570#[target_feature(enable = "avx512f,avx512vl")]
33571#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33572#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
33573#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33574pub const fn _mm_mask_cmplt_epu64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
33575 _mm_mask_cmp_epu64_mask::<_MM_CMPINT_LT>(k1, a, b)
33576}
33577
33578/// Compare packed unsigned 64-bit integers in a and b for greater-than, and store the results in mask vector k.
33579///
33580/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpgt_epu64_mask&expand=939)
33581#[inline]
33582#[target_feature(enable = "avx512f")]
33583#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33584#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
33585#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33586pub const fn _mm512_cmpgt_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
33587 unsafe { simd_bitmask::<__m512i, _>(simd_gt(x:a.as_u64x8(), y:b.as_u64x8())) }
33588}
33589
33590/// Compare packed unsigned 64-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33591///
33592/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpgt_epu64_mask&expand=940)
33593#[inline]
33594#[target_feature(enable = "avx512f")]
33595#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33596#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
33597#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33598pub const fn _mm512_mask_cmpgt_epu64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
33599 _mm512_mask_cmp_epu64_mask::<_MM_CMPINT_NLE>(k1, a, b)
33600}
33601
33602/// Compare packed unsigned 64-bit integers in a and b for greater-than, and store the results in mask vector k.
33603///
33604/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpgt_epu64_mask&expand=937)
33605#[inline]
33606#[target_feature(enable = "avx512f,avx512vl")]
33607#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33608#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
33609#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33610pub const fn _mm256_cmpgt_epu64_mask(a: __m256i, b: __m256i) -> __mmask8 {
33611 unsafe { simd_bitmask::<__m256i, _>(simd_gt(x:a.as_u64x4(), y:b.as_u64x4())) }
33612}
33613
33614/// Compare packed unsigned 64-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33615///
33616/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpgt_epu64_mask&expand=938)
33617#[inline]
33618#[target_feature(enable = "avx512f,avx512vl")]
33619#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33620#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
33621#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33622pub const fn _mm256_mask_cmpgt_epu64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
33623 _mm256_mask_cmp_epu64_mask::<_MM_CMPINT_NLE>(k1, a, b)
33624}
33625
33626/// Compare packed unsigned 64-bit integers in a and b for greater-than, and store the results in mask vector k.
33627///
33628/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_epu64_mask&expand=935)
33629#[inline]
33630#[target_feature(enable = "avx512f,avx512vl")]
33631#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33632#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
33633#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33634pub const fn _mm_cmpgt_epu64_mask(a: __m128i, b: __m128i) -> __mmask8 {
33635 unsafe { simd_bitmask::<__m128i, _>(simd_gt(x:a.as_u64x2(), y:b.as_u64x2())) }
33636}
33637
33638/// Compare packed unsigned 64-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33639///
33640/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpgt_epu64_mask&expand=936)
33641#[inline]
33642#[target_feature(enable = "avx512f,avx512vl")]
33643#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33644#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
33645#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33646pub const fn _mm_mask_cmpgt_epu64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
33647 _mm_mask_cmp_epu64_mask::<_MM_CMPINT_NLE>(k1, a, b)
33648}
33649
33650/// Compare packed unsigned 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
33651///
33652/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmple_epu64_mask&expand=1001)
33653#[inline]
33654#[target_feature(enable = "avx512f")]
33655#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33656#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
33657#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33658pub const fn _mm512_cmple_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
33659 unsafe { simd_bitmask::<__m512i, _>(simd_le(x:a.as_u64x8(), y:b.as_u64x8())) }
33660}
33661
33662/// Compare packed unsigned 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33663///
33664/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmple_epu64_mask&expand=1002)
33665#[inline]
33666#[target_feature(enable = "avx512f")]
33667#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33668#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
33669#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33670pub const fn _mm512_mask_cmple_epu64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
33671 _mm512_mask_cmp_epu64_mask::<_MM_CMPINT_LE>(k1, a, b)
33672}
33673
33674/// Compare packed unsigned 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
33675///
33676/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmple_epu64_mask&expand=999)
33677#[inline]
33678#[target_feature(enable = "avx512f,avx512vl")]
33679#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33680#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
33681#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33682pub const fn _mm256_cmple_epu64_mask(a: __m256i, b: __m256i) -> __mmask8 {
33683 unsafe { simd_bitmask::<__m256i, _>(simd_le(x:a.as_u64x4(), y:b.as_u64x4())) }
33684}
33685
33686/// Compare packed unsigned 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33687///
33688/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmple_epu64_mask&expand=1000)
33689#[inline]
33690#[target_feature(enable = "avx512f,avx512vl")]
33691#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33692#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
33693#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33694pub const fn _mm256_mask_cmple_epu64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
33695 _mm256_mask_cmp_epu64_mask::<_MM_CMPINT_LE>(k1, a, b)
33696}
33697
33698/// Compare packed unsigned 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
33699///
33700/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmple_epu64_mask&expand=997)
33701#[inline]
33702#[target_feature(enable = "avx512f,avx512vl")]
33703#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33704#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
33705#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33706pub const fn _mm_cmple_epu64_mask(a: __m128i, b: __m128i) -> __mmask8 {
33707 unsafe { simd_bitmask::<__m128i, _>(simd_le(x:a.as_u64x2(), y:b.as_u64x2())) }
33708}
33709
33710/// Compare packed unsigned 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33711///
33712/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmple_epu64_mask&expand=998)
33713#[inline]
33714#[target_feature(enable = "avx512f,avx512vl")]
33715#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33716#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
33717#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33718pub const fn _mm_mask_cmple_epu64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
33719 _mm_mask_cmp_epu64_mask::<_MM_CMPINT_LE>(k1, a, b)
33720}
33721
33722/// Compare packed unsigned 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
33723///
33724/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpge_epu64_mask&expand=879)
33725#[inline]
33726#[target_feature(enable = "avx512f")]
33727#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33728#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
33729#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33730pub const fn _mm512_cmpge_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
33731 unsafe { simd_bitmask::<__m512i, _>(simd_ge(x:a.as_u64x8(), y:b.as_u64x8())) }
33732}
33733
33734/// Compare packed unsigned 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33735///
33736/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpge_epu64_mask&expand=880)
33737#[inline]
33738#[target_feature(enable = "avx512f")]
33739#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33740#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
33741#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33742pub const fn _mm512_mask_cmpge_epu64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
33743 _mm512_mask_cmp_epu64_mask::<_MM_CMPINT_NLT>(k1, a, b)
33744}
33745
33746/// Compare packed unsigned 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
33747///
33748/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpge_epu64_mask&expand=877)
33749#[inline]
33750#[target_feature(enable = "avx512f,avx512vl")]
33751#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33752#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
33753#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33754pub const fn _mm256_cmpge_epu64_mask(a: __m256i, b: __m256i) -> __mmask8 {
33755 unsafe { simd_bitmask::<__m256i, _>(simd_ge(x:a.as_u64x4(), y:b.as_u64x4())) }
33756}
33757
33758/// Compare packed unsigned 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33759///
33760/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpge_epu64_mask&expand=878)
33761#[inline]
33762#[target_feature(enable = "avx512f,avx512vl")]
33763#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33764#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
33765#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33766pub const fn _mm256_mask_cmpge_epu64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
33767 _mm256_mask_cmp_epu64_mask::<_MM_CMPINT_NLT>(k1, a, b)
33768}
33769
33770/// Compare packed unsigned 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
33771///
33772/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpge_epu64_mask&expand=875)
33773#[inline]
33774#[target_feature(enable = "avx512f,avx512vl")]
33775#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33776#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
33777#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33778pub const fn _mm_cmpge_epu64_mask(a: __m128i, b: __m128i) -> __mmask8 {
33779 unsafe { simd_bitmask::<__m128i, _>(simd_ge(x:a.as_u64x2(), y:b.as_u64x2())) }
33780}
33781
33782/// Compare packed unsigned 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33783///
33784/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpge_epu64_mask&expand=876)
33785#[inline]
33786#[target_feature(enable = "avx512f,avx512vl")]
33787#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33788#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
33789#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33790pub const fn _mm_mask_cmpge_epu64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
33791 _mm_mask_cmp_epu64_mask::<_MM_CMPINT_NLT>(k1, a, b)
33792}
33793
33794/// Compare packed unsigned 64-bit integers in a and b for equality, and store the results in mask vector k.
33795///
33796/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpeq_epu64_mask&expand=813)
33797#[inline]
33798#[target_feature(enable = "avx512f")]
33799#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33800#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
33801#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33802pub const fn _mm512_cmpeq_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
33803 unsafe { simd_bitmask::<__m512i, _>(simd_eq(x:a.as_u64x8(), y:b.as_u64x8())) }
33804}
33805
33806/// Compare packed unsigned 64-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33807///
33808/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpeq_epu64_mask&expand=814)
33809#[inline]
33810#[target_feature(enable = "avx512f")]
33811#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33812#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
33813#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33814pub const fn _mm512_mask_cmpeq_epu64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
33815 _mm512_mask_cmp_epu64_mask::<_MM_CMPINT_EQ>(k1, a, b)
33816}
33817
33818/// Compare packed unsigned 64-bit integers in a and b for equality, and store the results in mask vector k.
33819///
33820/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpeq_epu64_mask&expand=811)
33821#[inline]
33822#[target_feature(enable = "avx512f,avx512vl")]
33823#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33824#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
33825#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33826pub const fn _mm256_cmpeq_epu64_mask(a: __m256i, b: __m256i) -> __mmask8 {
33827 unsafe { simd_bitmask::<__m256i, _>(simd_eq(x:a.as_u64x4(), y:b.as_u64x4())) }
33828}
33829
33830/// Compare packed unsigned 64-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33831///
33832/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpeq_epu64_mask&expand=812)
33833#[inline]
33834#[target_feature(enable = "avx512f,avx512vl")]
33835#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33836#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
33837#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33838pub const fn _mm256_mask_cmpeq_epu64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
33839 _mm256_mask_cmp_epu64_mask::<_MM_CMPINT_EQ>(k1, a, b)
33840}
33841
33842/// Compare packed unsigned 64-bit integers in a and b for equality, and store the results in mask vector k.
33843///
33844/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_epu64_mask&expand=809)
33845#[inline]
33846#[target_feature(enable = "avx512f,avx512vl")]
33847#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33848#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
33849#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33850pub const fn _mm_cmpeq_epu64_mask(a: __m128i, b: __m128i) -> __mmask8 {
33851 unsafe { simd_bitmask::<__m128i, _>(simd_eq(x:a.as_u64x2(), y:b.as_u64x2())) }
33852}
33853
33854/// Compare packed unsigned 64-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33855///
33856/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpeq_epu64_mask&expand=810)
33857#[inline]
33858#[target_feature(enable = "avx512f,avx512vl")]
33859#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33860#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
33861#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33862pub const fn _mm_mask_cmpeq_epu64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
33863 _mm_mask_cmp_epu64_mask::<_MM_CMPINT_EQ>(k1, a, b)
33864}
33865
33866/// Compare packed unsigned 64-bit integers in a and b for not-equal, and store the results in mask vector k.
33867///
33868/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpneq_epu64_mask&expand=1118)
33869#[inline]
33870#[target_feature(enable = "avx512f")]
33871#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33872#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
33873#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33874pub const fn _mm512_cmpneq_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
33875 unsafe { simd_bitmask::<__m512i, _>(simd_ne(x:a.as_u64x8(), y:b.as_u64x8())) }
33876}
33877
33878/// Compare packed unsigned 64-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33879///
33880/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpneq_epu64_mask&expand=1119)
33881#[inline]
33882#[target_feature(enable = "avx512f")]
33883#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33884#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
33885#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33886pub const fn _mm512_mask_cmpneq_epu64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
33887 _mm512_mask_cmp_epu64_mask::<_MM_CMPINT_NE>(k1, a, b)
33888}
33889
33890/// Compare packed unsigned 64-bit integers in a and b for not-equal, and store the results in mask vector k.
33891///
33892/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpneq_epu64_mask&expand=1116)
33893#[inline]
33894#[target_feature(enable = "avx512f,avx512vl")]
33895#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33896#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
33897#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33898pub const fn _mm256_cmpneq_epu64_mask(a: __m256i, b: __m256i) -> __mmask8 {
33899 unsafe { simd_bitmask::<__m256i, _>(simd_ne(x:a.as_u64x4(), y:b.as_u64x4())) }
33900}
33901
33902/// Compare packed unsigned 64-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33903///
33904/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpneq_epu64_mask&expand=1117)
33905#[inline]
33906#[target_feature(enable = "avx512f,avx512vl")]
33907#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33908#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
33909#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33910pub const fn _mm256_mask_cmpneq_epu64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
33911 _mm256_mask_cmp_epu64_mask::<_MM_CMPINT_NE>(k1, a, b)
33912}
33913
33914/// Compare packed unsigned 64-bit integers in a and b for not-equal, and store the results in mask vector k.
33915///
33916/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpneq_epu64_mask&expand=1114)
33917#[inline]
33918#[target_feature(enable = "avx512f,avx512vl")]
33919#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33920#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
33921#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33922pub const fn _mm_cmpneq_epu64_mask(a: __m128i, b: __m128i) -> __mmask8 {
33923 unsafe { simd_bitmask::<__m128i, _>(simd_ne(x:a.as_u64x2(), y:b.as_u64x2())) }
33924}
33925
33926/// Compare packed unsigned 64-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33927///
33928/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpneq_epu64_mask&expand=1115)
33929#[inline]
33930#[target_feature(enable = "avx512f,avx512vl")]
33931#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33932#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
33933#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33934pub const fn _mm_mask_cmpneq_epu64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
33935 _mm_mask_cmp_epu64_mask::<_MM_CMPINT_NE>(k1, a, b)
33936}
33937
33938/// Compare packed unsigned 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
33939///
33940/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmp_epu64_mask&expand=727)
33941#[inline]
33942#[target_feature(enable = "avx512f")]
33943#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33944#[rustc_legacy_const_generics(2)]
33945#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
33946#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33947pub const fn _mm512_cmp_epu64_mask<const IMM3: _MM_CMPINT_ENUM>(
33948 a: __m512i,
33949 b: __m512i,
33950) -> __mmask8 {
33951 unsafe {
33952 static_assert_uimm_bits!(IMM3, 3);
33953 let a: Simd = a.as_u64x8();
33954 let b: Simd = b.as_u64x8();
33955 let r: Simd = match IMM3 {
33956 0 => simd_eq(x:a, y:b),
33957 1 => simd_lt(x:a, y:b),
33958 2 => simd_le(x:a, y:b),
33959 3 => i64x8::ZERO,
33960 4 => simd_ne(x:a, y:b),
33961 5 => simd_ge(x:a, y:b),
33962 6 => simd_gt(x:a, y:b),
33963 _ => i64x8::splat(-1),
33964 };
33965 simd_bitmask(r)
33966 }
33967}
33968
33969/// Compare packed unsigned 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33970///
33971/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmp_epu64_mask&expand=728)
33972#[inline]
33973#[target_feature(enable = "avx512f")]
33974#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33975#[rustc_legacy_const_generics(3)]
33976#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
33977#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33978pub const fn _mm512_mask_cmp_epu64_mask<const IMM3: _MM_CMPINT_ENUM>(
33979 k1: __mmask8,
33980 a: __m512i,
33981 b: __m512i,
33982) -> __mmask8 {
33983 unsafe {
33984 static_assert_uimm_bits!(IMM3, 3);
33985 let a: Simd = a.as_u64x8();
33986 let b: Simd = b.as_u64x8();
33987 let k1: Simd = simd_select_bitmask(m:k1, yes:i64x8::splat(-1), no:i64x8::ZERO);
33988 let r: Simd = match IMM3 {
33989 0 => simd_and(x:k1, y:simd_eq(x:a, y:b)),
33990 1 => simd_and(x:k1, y:simd_lt(x:a, y:b)),
33991 2 => simd_and(x:k1, y:simd_le(x:a, y:b)),
33992 3 => i64x8::ZERO,
33993 4 => simd_and(x:k1, y:simd_ne(x:a, y:b)),
33994 5 => simd_and(x:k1, y:simd_ge(x:a, y:b)),
33995 6 => simd_and(x:k1, y:simd_gt(x:a, y:b)),
33996 _ => k1,
33997 };
33998 simd_bitmask(r)
33999 }
34000}
34001
34002/// Compare packed unsigned 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
34003///
34004/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmp_epu64_mask&expand=725)
34005#[inline]
34006#[target_feature(enable = "avx512f,avx512vl")]
34007#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34008#[rustc_legacy_const_generics(2)]
34009#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
34010#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34011pub const fn _mm256_cmp_epu64_mask<const IMM3: _MM_CMPINT_ENUM>(
34012 a: __m256i,
34013 b: __m256i,
34014) -> __mmask8 {
34015 unsafe {
34016 static_assert_uimm_bits!(IMM3, 3);
34017 let a: Simd = a.as_u64x4();
34018 let b: Simd = b.as_u64x4();
34019 let r: Simd = match IMM3 {
34020 0 => simd_eq(x:a, y:b),
34021 1 => simd_lt(x:a, y:b),
34022 2 => simd_le(x:a, y:b),
34023 3 => i64x4::ZERO,
34024 4 => simd_ne(x:a, y:b),
34025 5 => simd_ge(x:a, y:b),
34026 6 => simd_gt(x:a, y:b),
34027 _ => i64x4::splat(-1),
34028 };
34029 simd_bitmask(r)
34030 }
34031}
34032
34033/// Compare packed unsigned 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
34034///
34035/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmp_epu64_mask&expand=726)
34036#[inline]
34037#[target_feature(enable = "avx512f,avx512vl")]
34038#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34039#[rustc_legacy_const_generics(3)]
34040#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
34041#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34042pub const fn _mm256_mask_cmp_epu64_mask<const IMM3: _MM_CMPINT_ENUM>(
34043 k1: __mmask8,
34044 a: __m256i,
34045 b: __m256i,
34046) -> __mmask8 {
34047 unsafe {
34048 static_assert_uimm_bits!(IMM3, 3);
34049 let a: Simd = a.as_u64x4();
34050 let b: Simd = b.as_u64x4();
34051 let k1: Simd = simd_select_bitmask(m:k1, yes:i64x4::splat(-1), no:i64x4::ZERO);
34052 let r: Simd = match IMM3 {
34053 0 => simd_and(x:k1, y:simd_eq(x:a, y:b)),
34054 1 => simd_and(x:k1, y:simd_lt(x:a, y:b)),
34055 2 => simd_and(x:k1, y:simd_le(x:a, y:b)),
34056 3 => i64x4::ZERO,
34057 4 => simd_and(x:k1, y:simd_ne(x:a, y:b)),
34058 5 => simd_and(x:k1, y:simd_ge(x:a, y:b)),
34059 6 => simd_and(x:k1, y:simd_gt(x:a, y:b)),
34060 _ => k1,
34061 };
34062 simd_bitmask(r)
34063 }
34064}
34065
34066/// Compare packed unsigned 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
34067///
34068/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_epu64_mask&expand=723)
34069#[inline]
34070#[target_feature(enable = "avx512f,avx512vl")]
34071#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34072#[rustc_legacy_const_generics(2)]
34073#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
34074#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34075pub const fn _mm_cmp_epu64_mask<const IMM3: _MM_CMPINT_ENUM>(a: __m128i, b: __m128i) -> __mmask8 {
34076 unsafe {
34077 static_assert_uimm_bits!(IMM3, 3);
34078 let a: Simd = a.as_u64x2();
34079 let b: Simd = b.as_u64x2();
34080 let r: Simd = match IMM3 {
34081 0 => simd_eq(x:a, y:b),
34082 1 => simd_lt(x:a, y:b),
34083 2 => simd_le(x:a, y:b),
34084 3 => i64x2::ZERO,
34085 4 => simd_ne(x:a, y:b),
34086 5 => simd_ge(x:a, y:b),
34087 6 => simd_gt(x:a, y:b),
34088 _ => i64x2::splat(-1),
34089 };
34090 simd_bitmask(r)
34091 }
34092}
34093
34094/// Compare packed unsigned 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
34095///
34096/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_epu64_mask&expand=724)
34097#[inline]
34098#[target_feature(enable = "avx512f,avx512vl")]
34099#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34100#[rustc_legacy_const_generics(3)]
34101#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
34102#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34103pub const fn _mm_mask_cmp_epu64_mask<const IMM3: _MM_CMPINT_ENUM>(
34104 k1: __mmask8,
34105 a: __m128i,
34106 b: __m128i,
34107) -> __mmask8 {
34108 unsafe {
34109 static_assert_uimm_bits!(IMM3, 3);
34110 let a: Simd = a.as_u64x2();
34111 let b: Simd = b.as_u64x2();
34112 let k1: Simd = simd_select_bitmask(m:k1, yes:i64x2::splat(-1), no:i64x2::ZERO);
34113 let r: Simd = match IMM3 {
34114 0 => simd_and(x:k1, y:simd_eq(x:a, y:b)),
34115 1 => simd_and(x:k1, y:simd_lt(x:a, y:b)),
34116 2 => simd_and(x:k1, y:simd_le(x:a, y:b)),
34117 3 => i64x2::ZERO,
34118 4 => simd_and(x:k1, y:simd_ne(x:a, y:b)),
34119 5 => simd_and(x:k1, y:simd_ge(x:a, y:b)),
34120 6 => simd_and(x:k1, y:simd_gt(x:a, y:b)),
34121 _ => k1,
34122 };
34123 simd_bitmask(r)
34124 }
34125}
34126
34127/// Compare packed signed 64-bit integers in a and b for less-than, and store the results in mask vector k.
34128///
34129/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmplt_epi64_mask&expand=1037)
34130#[inline]
34131#[target_feature(enable = "avx512f")]
34132#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34133#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
34134#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34135pub const fn _mm512_cmplt_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
34136 unsafe { simd_bitmask::<__m512i, _>(simd_lt(x:a.as_i64x8(), y:b.as_i64x8())) }
34137}
34138
34139/// Compare packed signed 64-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
34140///
34141/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmplt_epi64_mask&expand=1038)
34142#[inline]
34143#[target_feature(enable = "avx512f")]
34144#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34145#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
34146#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34147pub const fn _mm512_mask_cmplt_epi64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
34148 _mm512_mask_cmp_epi64_mask::<_MM_CMPINT_LT>(k1, a, b)
34149}
34150
34151/// Compare packed signed 64-bit integers in a and b for less-than, and store the results in mask vector k.
34152///
34153/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmplt_epi64_mask&expand=1035)
34154#[inline]
34155#[target_feature(enable = "avx512f,avx512vl")]
34156#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34157#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
34158#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34159pub const fn _mm256_cmplt_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 {
34160 unsafe { simd_bitmask::<__m256i, _>(simd_lt(x:a.as_i64x4(), y:b.as_i64x4())) }
34161}
34162
34163/// Compare packed signed 64-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
34164///
34165/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmplt_epi64_mask&expand=1036)
34166#[inline]
34167#[target_feature(enable = "avx512f,avx512vl")]
34168#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34169#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
34170#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34171pub const fn _mm256_mask_cmplt_epi64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
34172 _mm256_mask_cmp_epi64_mask::<_MM_CMPINT_LT>(k1, a, b)
34173}
34174
34175/// Compare packed signed 64-bit integers in a and b for less-than, and store the results in mask vector k.
34176///
34177/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_epi64_mask&expand=1033)
34178#[inline]
34179#[target_feature(enable = "avx512f,avx512vl")]
34180#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34181#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
34182#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34183pub const fn _mm_cmplt_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 {
34184 unsafe { simd_bitmask::<__m128i, _>(simd_lt(x:a.as_i64x2(), y:b.as_i64x2())) }
34185}
34186
34187/// Compare packed signed 64-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
34188///
34189/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmplt_epi64_mask&expand=1034)
34190#[inline]
34191#[target_feature(enable = "avx512f,avx512vl")]
34192#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34193#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
34194#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34195pub const fn _mm_mask_cmplt_epi64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
34196 _mm_mask_cmp_epi64_mask::<_MM_CMPINT_LT>(k1, a, b)
34197}
34198
34199/// Compare packed signed 64-bit integers in a and b for greater-than, and store the results in mask vector k.
34200///
34201/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpgt_epi64_mask&expand=913)
34202#[inline]
34203#[target_feature(enable = "avx512f")]
34204#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34205#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
34206#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34207pub const fn _mm512_cmpgt_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
34208 unsafe { simd_bitmask::<__m512i, _>(simd_gt(x:a.as_i64x8(), y:b.as_i64x8())) }
34209}
34210
34211/// Compare packed signed 64-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
34212///
34213/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpgt_epi64_mask&expand=914)
34214#[inline]
34215#[target_feature(enable = "avx512f")]
34216#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34217#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
34218#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34219pub const fn _mm512_mask_cmpgt_epi64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
34220 _mm512_mask_cmp_epi64_mask::<_MM_CMPINT_NLE>(k1, a, b)
34221}
34222
34223/// Compare packed signed 64-bit integers in a and b for greater-than, and store the results in mask vector k.
34224///
34225/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpgt_epi64_mask&expand=911)
34226#[inline]
34227#[target_feature(enable = "avx512f,avx512vl")]
34228#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34229#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
34230#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34231pub const fn _mm256_cmpgt_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 {
34232 unsafe { simd_bitmask::<__m256i, _>(simd_gt(x:a.as_i64x4(), y:b.as_i64x4())) }
34233}
34234
34235/// Compare packed signed 64-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
34236///
34237/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpgt_epi64_mask&expand=912)
34238#[inline]
34239#[target_feature(enable = "avx512f,avx512vl")]
34240#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34241#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
34242#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34243pub const fn _mm256_mask_cmpgt_epi64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
34244 _mm256_mask_cmp_epi64_mask::<_MM_CMPINT_NLE>(k1, a, b)
34245}
34246
34247/// Compare packed signed 64-bit integers in a and b for greater-than, and store the results in mask vector k.
34248///
34249/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_epi64_mask&expand=909)
34250#[inline]
34251#[target_feature(enable = "avx512f,avx512vl")]
34252#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34253#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
34254#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34255pub const fn _mm_cmpgt_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 {
34256 unsafe { simd_bitmask::<__m128i, _>(simd_gt(x:a.as_i64x2(), y:b.as_i64x2())) }
34257}
34258
34259/// Compare packed signed 64-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
34260///
34261/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpgt_epi64_mask&expand=910)
34262#[inline]
34263#[target_feature(enable = "avx512f,avx512vl")]
34264#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34265#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
34266#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34267pub const fn _mm_mask_cmpgt_epi64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
34268 _mm_mask_cmp_epi64_mask::<_MM_CMPINT_NLE>(k1, a, b)
34269}
34270
34271/// Compare packed signed 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
34272///
34273/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmple_epi64_mask&expand=977)
34274#[inline]
34275#[target_feature(enable = "avx512f")]
34276#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34277#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
34278#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34279pub const fn _mm512_cmple_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
34280 unsafe { simd_bitmask::<__m512i, _>(simd_le(x:a.as_i64x8(), y:b.as_i64x8())) }
34281}
34282
34283/// Compare packed signed 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
34284///
34285/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmple_epi64_mask&expand=978)
34286#[inline]
34287#[target_feature(enable = "avx512f")]
34288#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34289#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
34290#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34291pub const fn _mm512_mask_cmple_epi64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
34292 _mm512_mask_cmp_epi64_mask::<_MM_CMPINT_LE>(k1, a, b)
34293}
34294
34295/// Compare packed signed 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
34296///
34297/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmple_epi64_mask&expand=975)
34298#[inline]
34299#[target_feature(enable = "avx512f,avx512vl")]
34300#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34301#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
34302#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34303pub const fn _mm256_cmple_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 {
34304 unsafe { simd_bitmask::<__m256i, _>(simd_le(x:a.as_i64x4(), y:b.as_i64x4())) }
34305}
34306
34307/// Compare packed signed 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
34308///
34309/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmple_epi64_mask&expand=976)
34310#[inline]
34311#[target_feature(enable = "avx512f,avx512vl")]
34312#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34313#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
34314#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34315pub const fn _mm256_mask_cmple_epi64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
34316 _mm256_mask_cmp_epi64_mask::<_MM_CMPINT_LE>(k1, a, b)
34317}
34318
34319/// Compare packed signed 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
34320///
34321/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmple_epi64_mask&expand=973)
34322#[inline]
34323#[target_feature(enable = "avx512f,avx512vl")]
34324#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34325#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
34326#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34327pub const fn _mm_cmple_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 {
34328 unsafe { simd_bitmask::<__m128i, _>(simd_le(x:a.as_i64x2(), y:b.as_i64x2())) }
34329}
34330
34331/// Compare packed signed 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
34332///
34333/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmple_epi64_mask&expand=974)
34334#[inline]
34335#[target_feature(enable = "avx512f,avx512vl")]
34336#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34337#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
34338#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34339pub const fn _mm_mask_cmple_epi64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
34340 _mm_mask_cmp_epi64_mask::<_MM_CMPINT_LE>(k1, a, b)
34341}
34342
34343/// Compare packed signed 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
34344///
34345/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpge_epi64_mask&expand=855)
34346#[inline]
34347#[target_feature(enable = "avx512f")]
34348#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34349#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
34350#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34351pub const fn _mm512_cmpge_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
34352 unsafe { simd_bitmask::<__m512i, _>(simd_ge(x:a.as_i64x8(), y:b.as_i64x8())) }
34353}
34354
34355/// Compare packed signed 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
34356///
34357/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpge_epi64_mask&expand=856)
34358#[inline]
34359#[target_feature(enable = "avx512f")]
34360#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34361#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
34362#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34363pub const fn _mm512_mask_cmpge_epi64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
34364 _mm512_mask_cmp_epi64_mask::<_MM_CMPINT_NLT>(k1, a, b)
34365}
34366
34367/// Compare packed signed 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
34368///
34369/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpge_epi64_mask&expand=853)
34370#[inline]
34371#[target_feature(enable = "avx512f,avx512vl")]
34372#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34373#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
34374#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34375pub const fn _mm256_cmpge_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 {
34376 unsafe { simd_bitmask::<__m256i, _>(simd_ge(x:a.as_i64x4(), y:b.as_i64x4())) }
34377}
34378
34379/// Compare packed signed 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
34380///
34381/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpge_epi64_mask&expand=854)
34382#[inline]
34383#[target_feature(enable = "avx512f,avx512vl")]
34384#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34385#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
34386#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34387pub const fn _mm256_mask_cmpge_epi64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
34388 _mm256_mask_cmp_epi64_mask::<_MM_CMPINT_NLT>(k1, a, b)
34389}
34390
34391/// Compare packed signed 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
34392///
34393/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpge_epi64_mask&expand=851)
34394#[inline]
34395#[target_feature(enable = "avx512f,avx512vl")]
34396#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34397#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
34398#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34399pub const fn _mm_cmpge_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 {
34400 unsafe { simd_bitmask::<__m128i, _>(simd_ge(x:a.as_i64x2(), y:b.as_i64x2())) }
34401}
34402
34403/// Compare packed signed 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
34404///
34405/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpge_epi64_mask&expand=852)
34406#[inline]
34407#[target_feature(enable = "avx512f,avx512vl")]
34408#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34409#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
34410#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34411pub const fn _mm_mask_cmpge_epi64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
34412 _mm_mask_cmp_epi64_mask::<_MM_CMPINT_NLT>(k1, a, b)
34413}
34414
34415/// Compare packed 64-bit integers in a and b for equality, and store the results in mask vector k.
34416///
34417/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpeq_epi64_mask&expand=787)
34418#[inline]
34419#[target_feature(enable = "avx512f")]
34420#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34421#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
34422#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34423pub const fn _mm512_cmpeq_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
34424 unsafe { simd_bitmask::<__m512i, _>(simd_eq(x:a.as_i64x8(), y:b.as_i64x8())) }
34425}
34426
34427/// Compare packed 64-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
34428///
34429/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpeq_epi64_mask&expand=788)
34430#[inline]
34431#[target_feature(enable = "avx512f")]
34432#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34433#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
34434#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34435pub const fn _mm512_mask_cmpeq_epi64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
34436 _mm512_mask_cmp_epi64_mask::<_MM_CMPINT_EQ>(k1, a, b)
34437}
34438
34439/// Compare packed 64-bit integers in a and b for equality, and store the results in mask vector k.
34440///
34441/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpeq_epi64_mask&expand=785)
34442#[inline]
34443#[target_feature(enable = "avx512f,avx512vl")]
34444#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34445#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
34446#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34447pub const fn _mm256_cmpeq_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 {
34448 unsafe { simd_bitmask::<__m256i, _>(simd_eq(x:a.as_i64x4(), y:b.as_i64x4())) }
34449}
34450
34451/// Compare packed 64-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
34452///
34453/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpeq_epi64_mask&expand=786)
34454#[inline]
34455#[target_feature(enable = "avx512f,avx512vl")]
34456#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34457#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
34458#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34459pub const fn _mm256_mask_cmpeq_epi64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
34460 _mm256_mask_cmp_epi64_mask::<_MM_CMPINT_EQ>(k1, a, b)
34461}
34462
34463/// Compare packed 64-bit integers in a and b for equality, and store the results in mask vector k.
34464///
34465/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_epi64_mask&expand=783)
34466#[inline]
34467#[target_feature(enable = "avx512f,avx512vl")]
34468#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34469#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
34470#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34471pub const fn _mm_cmpeq_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 {
34472 unsafe { simd_bitmask::<__m128i, _>(simd_eq(x:a.as_i64x2(), y:b.as_i64x2())) }
34473}
34474
34475/// Compare packed 64-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
34476///
34477/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpeq_epi64_mask&expand=784)
34478#[inline]
34479#[target_feature(enable = "avx512f,avx512vl")]
34480#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34481#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
34482#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34483pub const fn _mm_mask_cmpeq_epi64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
34484 _mm_mask_cmp_epi64_mask::<_MM_CMPINT_EQ>(k1, a, b)
34485}
34486
34487/// Compare packed signed 64-bit integers in a and b for not-equal, and store the results in mask vector k.
34488///
34489/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpneq_epi64_mask&expand=1094)
34490#[inline]
34491#[target_feature(enable = "avx512f")]
34492#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34493#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
34494#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34495pub const fn _mm512_cmpneq_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
34496 unsafe { simd_bitmask::<__m512i, _>(simd_ne(x:a.as_i64x8(), y:b.as_i64x8())) }
34497}
34498
34499/// Compare packed signed 64-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
34500///
34501/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpneq_epi64_mask&expand=1095)
34502#[inline]
34503#[target_feature(enable = "avx512f")]
34504#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34505#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
34506#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34507pub const fn _mm512_mask_cmpneq_epi64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
34508 _mm512_mask_cmp_epi64_mask::<_MM_CMPINT_NE>(k1, a, b)
34509}
34510
34511/// Compare packed signed 64-bit integers in a and b for not-equal, and store the results in mask vector k.
34512///
34513/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpneq_epi64_mask&expand=1092)
34514#[inline]
34515#[target_feature(enable = "avx512f,avx512vl")]
34516#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34517#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
34518#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34519pub const fn _mm256_cmpneq_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 {
34520 unsafe { simd_bitmask::<__m256i, _>(simd_ne(x:a.as_i64x4(), y:b.as_i64x4())) }
34521}
34522
34523/// Compare packed signed 64-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
34524///
34525/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpneq_epi64_mask&expand=1093)
34526#[inline]
34527#[target_feature(enable = "avx512f,avx512vl")]
34528#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34529#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
34530#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34531pub const fn _mm256_mask_cmpneq_epi64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
34532 _mm256_mask_cmp_epi64_mask::<_MM_CMPINT_NE>(k1, a, b)
34533}
34534
34535/// Compare packed signed 64-bit integers in a and b for not-equal, and store the results in mask vector k.
34536///
34537/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpneq_epi64_mask&expand=1090)
34538#[inline]
34539#[target_feature(enable = "avx512f,avx512vl")]
34540#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34541#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
34542#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34543pub const fn _mm_cmpneq_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 {
34544 unsafe { simd_bitmask::<__m128i, _>(simd_ne(x:a.as_i64x2(), y:b.as_i64x2())) }
34545}
34546
34547/// Compare packed signed 64-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
34548///
34549/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpneq_epi64_mask&expand=1091)
34550#[inline]
34551#[target_feature(enable = "avx512f,avx512vl")]
34552#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34553#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
34554#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34555pub const fn _mm_mask_cmpneq_epi64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
34556 _mm_mask_cmp_epi64_mask::<_MM_CMPINT_NE>(k1, a, b)
34557}
34558
34559/// Compare packed signed 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
34560///
34561/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmp_epi64_mask&expand=703)
34562#[inline]
34563#[target_feature(enable = "avx512f")]
34564#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34565#[rustc_legacy_const_generics(2)]
34566#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
34567#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34568pub const fn _mm512_cmp_epi64_mask<const IMM3: _MM_CMPINT_ENUM>(
34569 a: __m512i,
34570 b: __m512i,
34571) -> __mmask8 {
34572 unsafe {
34573 static_assert_uimm_bits!(IMM3, 3);
34574 let a: Simd = a.as_i64x8();
34575 let b: Simd = b.as_i64x8();
34576 let r: Simd = match IMM3 {
34577 0 => simd_eq(x:a, y:b),
34578 1 => simd_lt(x:a, y:b),
34579 2 => simd_le(x:a, y:b),
34580 3 => i64x8::ZERO,
34581 4 => simd_ne(x:a, y:b),
34582 5 => simd_ge(x:a, y:b),
34583 6 => simd_gt(x:a, y:b),
34584 _ => i64x8::splat(-1),
34585 };
34586 simd_bitmask(r)
34587 }
34588}
34589
34590/// Compare packed signed 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
34591///
34592/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmp_epi64_mask&expand=704)
34593#[inline]
34594#[target_feature(enable = "avx512f")]
34595#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34596#[rustc_legacy_const_generics(3)]
34597#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
34598#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34599pub const fn _mm512_mask_cmp_epi64_mask<const IMM3: _MM_CMPINT_ENUM>(
34600 k1: __mmask8,
34601 a: __m512i,
34602 b: __m512i,
34603) -> __mmask8 {
34604 unsafe {
34605 static_assert_uimm_bits!(IMM3, 3);
34606 let a: Simd = a.as_i64x8();
34607 let b: Simd = b.as_i64x8();
34608 let k1: Simd = simd_select_bitmask(m:k1, yes:i64x8::splat(-1), no:i64x8::ZERO);
34609 let r: Simd = match IMM3 {
34610 0 => simd_and(x:k1, y:simd_eq(x:a, y:b)),
34611 1 => simd_and(x:k1, y:simd_lt(x:a, y:b)),
34612 2 => simd_and(x:k1, y:simd_le(x:a, y:b)),
34613 3 => i64x8::ZERO,
34614 4 => simd_and(x:k1, y:simd_ne(x:a, y:b)),
34615 5 => simd_and(x:k1, y:simd_ge(x:a, y:b)),
34616 6 => simd_and(x:k1, y:simd_gt(x:a, y:b)),
34617 _ => k1,
34618 };
34619 simd_bitmask(r)
34620 }
34621}
34622
34623/// Compare packed signed 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
34624///
34625/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmp_epi64_mask&expand=701)
34626#[inline]
34627#[target_feature(enable = "avx512f,avx512vl")]
34628#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34629#[rustc_legacy_const_generics(2)]
34630#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
34631#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34632pub const fn _mm256_cmp_epi64_mask<const IMM3: _MM_CMPINT_ENUM>(
34633 a: __m256i,
34634 b: __m256i,
34635) -> __mmask8 {
34636 unsafe {
34637 static_assert_uimm_bits!(IMM3, 3);
34638 let a: Simd = a.as_i64x4();
34639 let b: Simd = b.as_i64x4();
34640 let r: Simd = match IMM3 {
34641 0 => simd_eq(x:a, y:b),
34642 1 => simd_lt(x:a, y:b),
34643 2 => simd_le(x:a, y:b),
34644 3 => i64x4::ZERO,
34645 4 => simd_ne(x:a, y:b),
34646 5 => simd_ge(x:a, y:b),
34647 6 => simd_gt(x:a, y:b),
34648 _ => i64x4::splat(-1),
34649 };
34650 simd_bitmask(r)
34651 }
34652}
34653
34654/// Compare packed signed 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
34655///
34656/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmp_epi64_mask&expand=702)
34657#[inline]
34658#[target_feature(enable = "avx512f,avx512vl")]
34659#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34660#[rustc_legacy_const_generics(3)]
34661#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
34662#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34663pub const fn _mm256_mask_cmp_epi64_mask<const IMM3: _MM_CMPINT_ENUM>(
34664 k1: __mmask8,
34665 a: __m256i,
34666 b: __m256i,
34667) -> __mmask8 {
34668 unsafe {
34669 static_assert_uimm_bits!(IMM3, 3);
34670 let a: Simd = a.as_i64x4();
34671 let b: Simd = b.as_i64x4();
34672 let k1: Simd = simd_select_bitmask(m:k1, yes:i64x4::splat(-1), no:i64x4::ZERO);
34673 let r: Simd = match IMM3 {
34674 0 => simd_and(x:k1, y:simd_eq(x:a, y:b)),
34675 1 => simd_and(x:k1, y:simd_lt(x:a, y:b)),
34676 2 => simd_and(x:k1, y:simd_le(x:a, y:b)),
34677 3 => i64x4::ZERO,
34678 4 => simd_and(x:k1, y:simd_ne(x:a, y:b)),
34679 5 => simd_and(x:k1, y:simd_ge(x:a, y:b)),
34680 6 => simd_and(x:k1, y:simd_gt(x:a, y:b)),
34681 _ => k1,
34682 };
34683 simd_bitmask(r)
34684 }
34685}
34686
34687/// Compare packed signed 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
34688///
34689/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_epi64_mask&expand=699)
34690#[inline]
34691#[target_feature(enable = "avx512f,avx512vl")]
34692#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34693#[rustc_legacy_const_generics(2)]
34694#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
34695#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34696pub const fn _mm_cmp_epi64_mask<const IMM3: _MM_CMPINT_ENUM>(a: __m128i, b: __m128i) -> __mmask8 {
34697 unsafe {
34698 static_assert_uimm_bits!(IMM3, 3);
34699 let a: Simd = a.as_i64x2();
34700 let b: Simd = b.as_i64x2();
34701 let r: Simd = match IMM3 {
34702 0 => simd_eq(x:a, y:b),
34703 1 => simd_lt(x:a, y:b),
34704 2 => simd_le(x:a, y:b),
34705 3 => i64x2::ZERO,
34706 4 => simd_ne(x:a, y:b),
34707 5 => simd_ge(x:a, y:b),
34708 6 => simd_gt(x:a, y:b),
34709 _ => i64x2::splat(-1),
34710 };
34711 simd_bitmask(r)
34712 }
34713}
34714
34715/// Compare packed signed 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
34716///
34717/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_epi64_mask&expand=700)
34718#[inline]
34719#[target_feature(enable = "avx512f,avx512vl")]
34720#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34721#[rustc_legacy_const_generics(3)]
34722#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
34723#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34724pub const fn _mm_mask_cmp_epi64_mask<const IMM3: _MM_CMPINT_ENUM>(
34725 k1: __mmask8,
34726 a: __m128i,
34727 b: __m128i,
34728) -> __mmask8 {
34729 unsafe {
34730 static_assert_uimm_bits!(IMM3, 3);
34731 let a: Simd = a.as_i64x2();
34732 let b: Simd = b.as_i64x2();
34733 let k1: Simd = simd_select_bitmask(m:k1, yes:i64x2::splat(-1), no:i64x2::ZERO);
34734 let r: Simd = match IMM3 {
34735 0 => simd_and(x:k1, y:simd_eq(x:a, y:b)),
34736 1 => simd_and(x:k1, y:simd_lt(x:a, y:b)),
34737 2 => simd_and(x:k1, y:simd_le(x:a, y:b)),
34738 3 => i64x2::ZERO,
34739 4 => simd_and(x:k1, y:simd_ne(x:a, y:b)),
34740 5 => simd_and(x:k1, y:simd_ge(x:a, y:b)),
34741 6 => simd_and(x:k1, y:simd_gt(x:a, y:b)),
34742 _ => k1,
34743 };
34744 simd_bitmask(r)
34745 }
34746}
34747
34748/// Reduce the packed 32-bit integers in a by addition. Returns the sum of all elements in a.
34749///
34750/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_add_epi32&expand=4556)
34751#[inline]
34752#[target_feature(enable = "avx512f")]
34753#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34754#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34755pub const fn _mm512_reduce_add_epi32(a: __m512i) -> i32 {
34756 unsafe { simd_reduce_add_ordered(x:a.as_i32x16(), y:0) }
34757}
34758
34759/// Reduce the packed 32-bit integers in a by addition using mask k. Returns the sum of all active elements in a.
34760///
34761/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_add_epi32&expand=4555)
34762#[inline]
34763#[target_feature(enable = "avx512f")]
34764#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34765#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34766pub const fn _mm512_mask_reduce_add_epi32(k: __mmask16, a: __m512i) -> i32 {
34767 unsafe { simd_reduce_add_ordered(x:simd_select_bitmask(k, a.as_i32x16(), i32x16::ZERO), y:0) }
34768}
34769
34770/// Reduce the packed 64-bit integers in a by addition. Returns the sum of all elements in a.
34771///
34772/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_add_epi64&expand=4558)
34773#[inline]
34774#[target_feature(enable = "avx512f")]
34775#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34776#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34777pub const fn _mm512_reduce_add_epi64(a: __m512i) -> i64 {
34778 unsafe { simd_reduce_add_ordered(x:a.as_i64x8(), y:0) }
34779}
34780
34781/// Reduce the packed 64-bit integers in a by addition using mask k. Returns the sum of all active elements in a.
34782///
34783/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_add_epi64&expand=4557)
34784#[inline]
34785#[target_feature(enable = "avx512f")]
34786#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34787#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34788pub const fn _mm512_mask_reduce_add_epi64(k: __mmask8, a: __m512i) -> i64 {
34789 unsafe { simd_reduce_add_ordered(x:simd_select_bitmask(k, a.as_i64x8(), i64x8::ZERO), y:0) }
34790}
34791
34792/// Reduce the packed single-precision (32-bit) floating-point elements in a by addition. Returns the sum of all elements in a.
34793///
34794/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_add_ps&expand=4562)
34795#[inline]
34796#[target_feature(enable = "avx512f")]
34797#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34798#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34799pub const fn _mm512_reduce_add_ps(a: __m512) -> f32 {
34800 unsafe {
34801 // we have to use `simd_shuffle` here because `_mm512_extractf32x8_ps` is in AVX512DQ
34802 let a: __m256 = _mm256_add_ps(
34803 a:simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]),
34804 b:simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]),
34805 );
34806 let a: __m128 = _mm_add_ps(a:_mm256_extractf128_ps::<0>(a), b:_mm256_extractf128_ps::<1>(a));
34807 let a: __m128 = _mm_add_ps(a, b:simd_shuffle!(a, a, [2, 3, 0, 1]));
34808 simd_extract!(a, 0, f32) + simd_extract!(a, 1, f32)
34809 }
34810}
34811
34812/// Reduce the packed single-precision (32-bit) floating-point elements in a by addition using mask k. Returns the sum of all active elements in a.
34813///
34814/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_add_ps&expand=4561)
34815#[inline]
34816#[target_feature(enable = "avx512f")]
34817#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34818#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34819pub const fn _mm512_mask_reduce_add_ps(k: __mmask16, a: __m512) -> f32 {
34820 unsafe { _mm512_reduce_add_ps(simd_select_bitmask(m:k, yes:a, no:_mm512_setzero_ps())) }
34821}
34822
34823/// Reduce the packed double-precision (64-bit) floating-point elements in a by addition. Returns the sum of all elements in a.
34824///
34825/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_add_pd&expand=4560)
34826#[inline]
34827#[target_feature(enable = "avx512f")]
34828#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34829#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34830pub const fn _mm512_reduce_add_pd(a: __m512d) -> f64 {
34831 unsafe {
34832 let a: __m256d = _mm256_add_pd(
34833 a:_mm512_extractf64x4_pd::<0>(a),
34834 b:_mm512_extractf64x4_pd::<1>(a),
34835 );
34836 let a: __m128d = _mm_add_pd(a:_mm256_extractf128_pd::<0>(a), b:_mm256_extractf128_pd::<1>(a));
34837 simd_extract!(a, 0, f64) + simd_extract!(a, 1, f64)
34838 }
34839}
34840
34841/// Reduce the packed double-precision (64-bit) floating-point elements in a by addition using mask k. Returns the sum of all active elements in a.
34842///
34843/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_add_pd&expand=4559)
34844#[inline]
34845#[target_feature(enable = "avx512f")]
34846#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34847#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34848pub const fn _mm512_mask_reduce_add_pd(k: __mmask8, a: __m512d) -> f64 {
34849 unsafe { _mm512_reduce_add_pd(simd_select_bitmask(m:k, yes:a, no:_mm512_setzero_pd())) }
34850}
34851
34852/// Reduce the packed 32-bit integers in a by multiplication. Returns the product of all elements in a.
34853///
34854/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_mul_epi32&expand=4600)
34855#[inline]
34856#[target_feature(enable = "avx512f")]
34857#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34858#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34859pub const fn _mm512_reduce_mul_epi32(a: __m512i) -> i32 {
34860 unsafe { simd_reduce_mul_ordered(x:a.as_i32x16(), y:1) }
34861}
34862
34863/// Reduce the packed 32-bit integers in a by multiplication using mask k. Returns the product of all active elements in a.
34864///
34865/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_mul_epi32&expand=4599)
34866#[inline]
34867#[target_feature(enable = "avx512f")]
34868#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34869#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34870pub const fn _mm512_mask_reduce_mul_epi32(k: __mmask16, a: __m512i) -> i32 {
34871 unsafe {
34872 simd_reduce_mul_ordered(
34873 x:simd_select_bitmask(k, a.as_i32x16(), _mm512_set1_epi32(1).as_i32x16()),
34874 y:1,
34875 )
34876 }
34877}
34878
34879/// Reduce the packed 64-bit integers in a by multiplication. Returns the product of all elements in a.
34880///
34881/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_mul_epi64&expand=4602)
34882#[inline]
34883#[target_feature(enable = "avx512f")]
34884#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34885#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34886pub const fn _mm512_reduce_mul_epi64(a: __m512i) -> i64 {
34887 unsafe { simd_reduce_mul_ordered(x:a.as_i64x8(), y:1) }
34888}
34889
34890/// Reduce the packed 64-bit integers in a by multiplication using mask k. Returns the product of all active elements in a.
34891///
34892/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_mul_epi64&expand=4601)
34893#[inline]
34894#[target_feature(enable = "avx512f")]
34895#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34896#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34897pub const fn _mm512_mask_reduce_mul_epi64(k: __mmask8, a: __m512i) -> i64 {
34898 unsafe {
34899 simd_reduce_mul_ordered(
34900 x:simd_select_bitmask(k, a.as_i64x8(), _mm512_set1_epi64(1).as_i64x8()),
34901 y:1,
34902 )
34903 }
34904}
34905
34906/// Reduce the packed single-precision (32-bit) floating-point elements in a by multiplication. Returns the product of all elements in a.
34907///
34908/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_mul_ps&expand=4606)
34909#[inline]
34910#[target_feature(enable = "avx512f")]
34911#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34912#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34913pub const fn _mm512_reduce_mul_ps(a: __m512) -> f32 {
34914 unsafe {
34915 // we have to use `simd_shuffle` here because `_mm512_extractf32x8_ps` is in AVX512DQ
34916 let a: __m256 = _mm256_mul_ps(
34917 a:simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]),
34918 b:simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]),
34919 );
34920 let a: __m128 = _mm_mul_ps(a:_mm256_extractf128_ps::<0>(a), b:_mm256_extractf128_ps::<1>(a));
34921 let a: __m128 = _mm_mul_ps(a, b:simd_shuffle!(a, a, [2, 3, 0, 1]));
34922 simd_extract!(a, 0, f32) * simd_extract!(a, 1, f32)
34923 }
34924}
34925
34926/// Reduce the packed single-precision (32-bit) floating-point elements in a by multiplication using mask k. Returns the product of all active elements in a.
34927///
34928/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_mul_ps&expand=4605)
34929#[inline]
34930#[target_feature(enable = "avx512f")]
34931#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34932#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34933pub const fn _mm512_mask_reduce_mul_ps(k: __mmask16, a: __m512) -> f32 {
34934 unsafe { _mm512_reduce_mul_ps(simd_select_bitmask(m:k, yes:a, no:_mm512_set1_ps(1.))) }
34935}
34936
34937/// Reduce the packed double-precision (64-bit) floating-point elements in a by multiplication. Returns the product of all elements in a.
34938///
34939/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_mul_pd&expand=4604)
34940#[inline]
34941#[target_feature(enable = "avx512f")]
34942#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34943#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34944pub const fn _mm512_reduce_mul_pd(a: __m512d) -> f64 {
34945 unsafe {
34946 let a: __m256d = _mm256_mul_pd(
34947 a:_mm512_extractf64x4_pd::<0>(a),
34948 b:_mm512_extractf64x4_pd::<1>(a),
34949 );
34950 let a: __m128d = _mm_mul_pd(a:_mm256_extractf128_pd::<0>(a), b:_mm256_extractf128_pd::<1>(a));
34951 simd_extract!(a, 0, f64) * simd_extract!(a, 1, f64)
34952 }
34953}
34954
34955/// Reduce the packed double-precision (64-bit) floating-point elements in a by multiplication using mask k. Returns the product of all active elements in a.
34956///
34957/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_mul_pd&expand=4603)
34958#[inline]
34959#[target_feature(enable = "avx512f")]
34960#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34961#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34962pub const fn _mm512_mask_reduce_mul_pd(k: __mmask8, a: __m512d) -> f64 {
34963 unsafe { _mm512_reduce_mul_pd(simd_select_bitmask(m:k, yes:a, no:_mm512_set1_pd(1.))) }
34964}
34965
34966/// Reduce the packed signed 32-bit integers in a by maximum. Returns the maximum of all elements in a.
34967///
34968/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_max_epi32&expand=4576)
34969#[inline]
34970#[target_feature(enable = "avx512f")]
34971#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34972#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34973pub const fn _mm512_reduce_max_epi32(a: __m512i) -> i32 {
34974 unsafe { simd_reduce_max(a.as_i32x16()) }
34975}
34976
34977/// Reduce the packed signed 32-bit integers in a by maximum using mask k. Returns the maximum of all active elements in a.
34978///
34979/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_max_epi32&expand=4575)
34980#[inline]
34981#[target_feature(enable = "avx512f")]
34982#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34983#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34984pub const fn _mm512_mask_reduce_max_epi32(k: __mmask16, a: __m512i) -> i32 {
34985 unsafe {
34986 simd_reduce_max(simd_select_bitmask(
34987 m:k,
34988 yes:a.as_i32x16(),
34989 no:i32x16::splat(i32::MIN),
34990 ))
34991 }
34992}
34993
34994/// Reduce the packed signed 64-bit integers in a by maximum. Returns the maximum of all elements in a.
34995///
34996/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_max_epi64&expand=4578)
34997#[inline]
34998#[target_feature(enable = "avx512f")]
34999#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35000#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
35001pub const fn _mm512_reduce_max_epi64(a: __m512i) -> i64 {
35002 unsafe { simd_reduce_max(a.as_i64x8()) }
35003}
35004
35005/// Reduce the packed signed 64-bit integers in a by maximum using mask k. Returns the maximum of all active elements in a.
35006///
35007/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_max_epi64&expand=4577)
35008#[inline]
35009#[target_feature(enable = "avx512f")]
35010#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35011#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
35012pub const fn _mm512_mask_reduce_max_epi64(k: __mmask8, a: __m512i) -> i64 {
35013 unsafe { simd_reduce_max(simd_select_bitmask(m:k, yes:a.as_i64x8(), no:i64x8::splat(i64::MIN))) }
35014}
35015
35016/// Reduce the packed unsigned 32-bit integers in a by maximum. Returns the maximum of all elements in a.
35017///
35018/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_max_epu32&expand=4580)
35019#[inline]
35020#[target_feature(enable = "avx512f")]
35021#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35022#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
35023pub const fn _mm512_reduce_max_epu32(a: __m512i) -> u32 {
35024 unsafe { simd_reduce_max(a.as_u32x16()) }
35025}
35026
35027/// Reduce the packed unsigned 32-bit integers in a by maximum using mask k. Returns the maximum of all active elements in a.
35028///
35029/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_max_epu32&expand=4579)
35030#[inline]
35031#[target_feature(enable = "avx512f")]
35032#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35033#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
35034pub const fn _mm512_mask_reduce_max_epu32(k: __mmask16, a: __m512i) -> u32 {
35035 unsafe { simd_reduce_max(simd_select_bitmask(m:k, yes:a.as_u32x16(), no:u32x16::ZERO)) }
35036}
35037
35038/// Reduce the packed unsigned 64-bit integers in a by maximum. Returns the maximum of all elements in a.
35039///
35040/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_max_epu64&expand=4582)
35041#[inline]
35042#[target_feature(enable = "avx512f")]
35043#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35044#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
35045pub const fn _mm512_reduce_max_epu64(a: __m512i) -> u64 {
35046 unsafe { simd_reduce_max(a.as_u64x8()) }
35047}
35048
35049/// Reduce the packed unsigned 64-bit integers in a by maximum using mask k. Returns the maximum of all active elements in a.
35050///
35051/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_max_epu64&expand=4581)
35052#[inline]
35053#[target_feature(enable = "avx512f")]
35054#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35055#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
35056pub const fn _mm512_mask_reduce_max_epu64(k: __mmask8, a: __m512i) -> u64 {
35057 unsafe { simd_reduce_max(simd_select_bitmask(m:k, yes:a.as_u64x8(), no:u64x8::ZERO)) }
35058}
35059
35060/// Reduce the packed single-precision (32-bit) floating-point elements in a by maximum. Returns the maximum of all elements in a.
35061///
35062/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_max_ps&expand=4586)
35063#[inline]
35064#[target_feature(enable = "avx512f")]
35065#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35066pub fn _mm512_reduce_max_ps(a: __m512) -> f32 {
35067 unsafe {
35068 let a: __m256 = _mm256_max_ps(
35069 a:simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]),
35070 b:simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]),
35071 );
35072 let a: __m128 = _mm_max_ps(a:_mm256_extractf128_ps::<0>(a), b:_mm256_extractf128_ps::<1>(a));
35073 let a: __m128 = _mm_max_ps(a, b:simd_shuffle!(a, a, [2, 3, 0, 1]));
35074 _mm_cvtss_f32(_mm_max_ss(a, b:_mm_movehdup_ps(a)))
35075 }
35076}
35077
35078/// Reduce the packed single-precision (32-bit) floating-point elements in a by maximum using mask k. Returns the maximum of all active elements in a.
35079///
35080/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_max_ps&expand=4585)
35081#[inline]
35082#[target_feature(enable = "avx512f")]
35083#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35084pub fn _mm512_mask_reduce_max_ps(k: __mmask16, a: __m512) -> f32 {
35085 _mm512_reduce_max_ps(_mm512_mask_mov_ps(src:_mm512_set1_ps(f32::MIN), k, a))
35086}
35087
35088/// Reduce the packed double-precision (64-bit) floating-point elements in a by maximum. Returns the maximum of all elements in a.
35089///
35090/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_max_pd&expand=4584)
35091#[inline]
35092#[target_feature(enable = "avx512f")]
35093#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35094pub fn _mm512_reduce_max_pd(a: __m512d) -> f64 {
35095 unsafe {
35096 let a: __m256d = _mm256_max_pd(
35097 a:_mm512_extractf64x4_pd::<0>(a),
35098 b:_mm512_extractf64x4_pd::<1>(a),
35099 );
35100 let a: __m128d = _mm_max_pd(a:_mm256_extractf128_pd::<0>(a), b:_mm256_extractf128_pd::<1>(a));
35101 _mm_cvtsd_f64(_mm_max_sd(a, b:simd_shuffle!(a, a, [1, 0])))
35102 }
35103}
35104
35105/// Reduce the packed double-precision (64-bit) floating-point elements in a by maximum using mask k. Returns the maximum of all active elements in a.
35106///
35107/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_max_pd&expand=4583)
35108#[inline]
35109#[target_feature(enable = "avx512f")]
35110#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35111pub fn _mm512_mask_reduce_max_pd(k: __mmask8, a: __m512d) -> f64 {
35112 _mm512_reduce_max_pd(_mm512_mask_mov_pd(src:_mm512_set1_pd(f64::MIN), k, a))
35113}
35114
35115/// Reduce the packed signed 32-bit integers in a by minimum. Returns the minimum of all elements in a.
35116///
35117/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_min_epi32&expand=4588)
35118#[inline]
35119#[target_feature(enable = "avx512f")]
35120#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35121#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
35122pub const fn _mm512_reduce_min_epi32(a: __m512i) -> i32 {
35123 unsafe { simd_reduce_min(a.as_i32x16()) }
35124}
35125
35126/// Reduce the packed signed 32-bit integers in a by maximum using mask k. Returns the minimum of all active elements in a.
35127///
35128/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_min_epi32&expand=4587)
35129#[inline]
35130#[target_feature(enable = "avx512f")]
35131#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35132#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
35133pub const fn _mm512_mask_reduce_min_epi32(k: __mmask16, a: __m512i) -> i32 {
35134 unsafe {
35135 simd_reduce_min(simd_select_bitmask(
35136 m:k,
35137 yes:a.as_i32x16(),
35138 no:i32x16::splat(i32::MAX),
35139 ))
35140 }
35141}
35142
35143/// Reduce the packed signed 64-bit integers in a by minimum. Returns the minimum of all elements in a.
35144///
35145/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_min_epi64&expand=4590)
35146#[inline]
35147#[target_feature(enable = "avx512f")]
35148#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35149#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
35150pub const fn _mm512_reduce_min_epi64(a: __m512i) -> i64 {
35151 unsafe { simd_reduce_min(a.as_i64x8()) }
35152}
35153
35154/// Reduce the packed signed 64-bit integers in a by maximum using mask k. Returns the minimum of all active elements in a.
35155///
35156/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_min_epi64&expand=4589)
35157#[inline]
35158#[target_feature(enable = "avx512f")]
35159#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35160#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
35161pub const fn _mm512_mask_reduce_min_epi64(k: __mmask8, a: __m512i) -> i64 {
35162 unsafe { simd_reduce_min(simd_select_bitmask(m:k, yes:a.as_i64x8(), no:i64x8::splat(i64::MAX))) }
35163}
35164
35165/// Reduce the packed unsigned 32-bit integers in a by minimum. Returns the minimum of all elements in a.
35166///
35167/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_min_epu32&expand=4592)
35168#[inline]
35169#[target_feature(enable = "avx512f")]
35170#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35171#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
35172pub const fn _mm512_reduce_min_epu32(a: __m512i) -> u32 {
35173 unsafe { simd_reduce_min(a.as_u32x16()) }
35174}
35175
35176/// Reduce the packed unsigned 32-bit integers in a by maximum using mask k. Returns the minimum of all active elements in a.
35177///
35178/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_min_epu32&expand=4591)
35179#[inline]
35180#[target_feature(enable = "avx512f")]
35181#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35182#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
35183pub const fn _mm512_mask_reduce_min_epu32(k: __mmask16, a: __m512i) -> u32 {
35184 unsafe {
35185 simd_reduce_min(simd_select_bitmask(
35186 m:k,
35187 yes:a.as_u32x16(),
35188 no:u32x16::splat(u32::MAX),
35189 ))
35190 }
35191}
35192
35193/// Reduce the packed unsigned 64-bit integers in a by minimum. Returns the minimum of all elements in a.
35194///
35195/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_min_epu64&expand=4594)
35196#[inline]
35197#[target_feature(enable = "avx512f")]
35198#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35199#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
35200pub const fn _mm512_reduce_min_epu64(a: __m512i) -> u64 {
35201 unsafe { simd_reduce_min(a.as_u64x8()) }
35202}
35203
35204/// Reduce the packed signed 64-bit integers in a by maximum using mask k. Returns the minimum of all active elements in a.
35205///
35206/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_min_epu64&expand=4589)
35207#[inline]
35208#[target_feature(enable = "avx512f")]
35209#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35210#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
35211pub const fn _mm512_mask_reduce_min_epu64(k: __mmask8, a: __m512i) -> u64 {
35212 unsafe { simd_reduce_min(simd_select_bitmask(m:k, yes:a.as_u64x8(), no:u64x8::splat(u64::MAX))) }
35213}
35214
35215/// Reduce the packed single-precision (32-bit) floating-point elements in a by minimum. Returns the minimum of all elements in a.
35216///
35217/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_min_ps&expand=4598)
35218#[inline]
35219#[target_feature(enable = "avx512f")]
35220#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35221pub fn _mm512_reduce_min_ps(a: __m512) -> f32 {
35222 unsafe {
35223 let a: __m256 = _mm256_min_ps(
35224 a:simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]),
35225 b:simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]),
35226 );
35227 let a: __m128 = _mm_min_ps(a:_mm256_extractf128_ps::<0>(a), b:_mm256_extractf128_ps::<1>(a));
35228 let a: __m128 = _mm_min_ps(a, b:simd_shuffle!(a, a, [2, 3, 0, 1]));
35229 _mm_cvtss_f32(_mm_min_ss(a, b:_mm_movehdup_ps(a)))
35230 }
35231}
35232
35233/// Reduce the packed single-precision (32-bit) floating-point elements in a by maximum using mask k. Returns the minimum of all active elements in a.
35234///
35235/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_min_ps&expand=4597)
35236#[inline]
35237#[target_feature(enable = "avx512f")]
35238#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35239pub fn _mm512_mask_reduce_min_ps(k: __mmask16, a: __m512) -> f32 {
35240 _mm512_reduce_min_ps(_mm512_mask_mov_ps(src:_mm512_set1_ps(f32::MAX), k, a))
35241}
35242
35243/// Reduce the packed double-precision (64-bit) floating-point elements in a by minimum. Returns the minimum of all elements in a.
35244///
35245/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_min_pd&expand=4596)
35246#[inline]
35247#[target_feature(enable = "avx512f")]
35248#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35249pub fn _mm512_reduce_min_pd(a: __m512d) -> f64 {
35250 unsafe {
35251 let a: __m256d = _mm256_min_pd(
35252 a:_mm512_extractf64x4_pd::<0>(a),
35253 b:_mm512_extractf64x4_pd::<1>(a),
35254 );
35255 let a: __m128d = _mm_min_pd(a:_mm256_extractf128_pd::<0>(a), b:_mm256_extractf128_pd::<1>(a));
35256 _mm_cvtsd_f64(_mm_min_sd(a, b:simd_shuffle!(a, a, [1, 0])))
35257 }
35258}
35259
35260/// Reduce the packed double-precision (64-bit) floating-point elements in a by maximum using mask k. Returns the minimum of all active elements in a.
35261///
35262/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_min_pd&expand=4595)
35263#[inline]
35264#[target_feature(enable = "avx512f")]
35265#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35266pub fn _mm512_mask_reduce_min_pd(k: __mmask8, a: __m512d) -> f64 {
35267 _mm512_reduce_min_pd(_mm512_mask_mov_pd(src:_mm512_set1_pd(f64::MAX), k, a))
35268}
35269
35270/// Reduce the packed 32-bit integers in a by bitwise AND. Returns the bitwise AND of all elements in a.
35271///
35272/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_and_epi32&expand=4564)
35273#[inline]
35274#[target_feature(enable = "avx512f")]
35275#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35276#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
35277pub const fn _mm512_reduce_and_epi32(a: __m512i) -> i32 {
35278 unsafe { simd_reduce_and(a.as_i32x16()) }
35279}
35280
35281/// Reduce the packed 32-bit integers in a by bitwise AND using mask k. Returns the bitwise AND of all active elements in a.
35282///
35283/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_and_epi32&expand=4563)
35284#[inline]
35285#[target_feature(enable = "avx512f")]
35286#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35287#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
35288pub const fn _mm512_mask_reduce_and_epi32(k: __mmask16, a: __m512i) -> i32 {
35289 unsafe { simd_reduce_and(simd_select_bitmask(m:k, yes:a.as_i32x16(), no:i32x16::splat(-1))) }
35290}
35291
35292/// Reduce the packed 64-bit integers in a by bitwise AND. Returns the bitwise AND of all elements in a.
35293///
35294/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_and_epi64&expand=4566)
35295#[inline]
35296#[target_feature(enable = "avx512f")]
35297#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35298#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
35299pub const fn _mm512_reduce_and_epi64(a: __m512i) -> i64 {
35300 unsafe { simd_reduce_and(a.as_i64x8()) }
35301}
35302
35303/// Reduce the packed 64-bit integers in a by addition using mask k. Returns the sum of all active elements in a.
35304///
35305/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_and_epi64&expand=4557)
35306#[inline]
35307#[target_feature(enable = "avx512f")]
35308#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35309#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
35310pub const fn _mm512_mask_reduce_and_epi64(k: __mmask8, a: __m512i) -> i64 {
35311 unsafe { simd_reduce_and(simd_select_bitmask(m:k, yes:a.as_i64x8(), no:i64x8::splat(-1))) }
35312}
35313
35314/// Reduce the packed 32-bit integers in a by bitwise OR. Returns the bitwise OR of all elements in a.
35315///
35316/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_or_epi32&expand=4608)
35317#[inline]
35318#[target_feature(enable = "avx512f")]
35319#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35320#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
35321pub const fn _mm512_reduce_or_epi32(a: __m512i) -> i32 {
35322 unsafe { simd_reduce_or(a.as_i32x16()) }
35323}
35324
35325/// Reduce the packed 32-bit integers in a by bitwise OR using mask k. Returns the bitwise OR of all active elements in a.
35326///
35327/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_or_epi32&expand=4607)
35328#[inline]
35329#[target_feature(enable = "avx512f")]
35330#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35331#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
35332pub const fn _mm512_mask_reduce_or_epi32(k: __mmask16, a: __m512i) -> i32 {
35333 unsafe { simd_reduce_or(simd_select_bitmask(m:k, yes:a.as_i32x16(), no:i32x16::ZERO)) }
35334}
35335
35336/// Reduce the packed 64-bit integers in a by bitwise OR. Returns the bitwise OR of all elements in a.
35337///
35338/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_or_epi64&expand=4610)
35339#[inline]
35340#[target_feature(enable = "avx512f")]
35341#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35342#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
35343pub const fn _mm512_reduce_or_epi64(a: __m512i) -> i64 {
35344 unsafe { simd_reduce_or(a.as_i64x8()) }
35345}
35346
35347/// Reduce the packed 64-bit integers in a by bitwise OR using mask k. Returns the bitwise OR of all active elements in a.
35348///
35349/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_or_epi64&expand=4609)
35350#[inline]
35351#[target_feature(enable = "avx512f")]
35352#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35353#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
35354pub const fn _mm512_mask_reduce_or_epi64(k: __mmask8, a: __m512i) -> i64 {
35355 unsafe { simd_reduce_or(simd_select_bitmask(m:k, yes:a.as_i64x8(), no:i64x8::ZERO)) }
35356}
35357
35358/// Returns vector of type `__m512d` with indeterminate elements.
35359/// Despite using the word "undefined" (following Intel's naming scheme), this non-deterministically
35360/// picks some valid value and is not equivalent to [`mem::MaybeUninit`].
35361/// In practice, this is typically equivalent to [`mem::zeroed`].
35362///
35363/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_undefined_pd)
35364#[inline]
35365#[target_feature(enable = "avx512f")]
35366#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35367// This intrinsic has no corresponding instruction.
35368#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
35369pub const fn _mm512_undefined_pd() -> __m512d {
35370 unsafe { const { mem::zeroed() } }
35371}
35372
35373/// Returns vector of type `__m512` with indeterminate elements.
35374/// Despite using the word "undefined" (following Intel's naming scheme), this non-deterministically
35375/// picks some valid value and is not equivalent to [`mem::MaybeUninit`].
35376/// In practice, this is typically equivalent to [`mem::zeroed`].
35377///
35378/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_undefined_ps)
35379#[inline]
35380#[target_feature(enable = "avx512f")]
35381#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35382// This intrinsic has no corresponding instruction.
35383#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
35384pub const fn _mm512_undefined_ps() -> __m512 {
35385 unsafe { const { mem::zeroed() } }
35386}
35387
35388/// Return vector of type __m512i with indeterminate elements.
35389/// Despite using the word "undefined" (following Intel's naming scheme), this non-deterministically
35390/// picks some valid value and is not equivalent to [`mem::MaybeUninit`].
35391/// In practice, this is typically equivalent to [`mem::zeroed`].
35392///
35393/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_undefined_epi32&expand=5995)
35394#[inline]
35395#[target_feature(enable = "avx512f")]
35396#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35397// This intrinsic has no corresponding instruction.
35398#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
35399pub const fn _mm512_undefined_epi32() -> __m512i {
35400 unsafe { const { mem::zeroed() } }
35401}
35402
35403/// Return vector of type __m512 with indeterminate elements.
35404/// Despite using the word "undefined" (following Intel's naming scheme), this non-deterministically
35405/// picks some valid value and is not equivalent to [`mem::MaybeUninit`].
35406/// In practice, this is typically equivalent to [`mem::zeroed`].
35407///
35408/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_undefined&expand=5994)
35409#[inline]
35410#[target_feature(enable = "avx512f")]
35411#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35412// This intrinsic has no corresponding instruction.
35413#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
35414pub const fn _mm512_undefined() -> __m512 {
35415 unsafe { const { mem::zeroed() } }
35416}
35417
35418/// Load 512-bits (composed of 16 packed 32-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary.
35419///
35420/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_loadu_epi32&expand=3377)
35421#[inline]
35422#[target_feature(enable = "avx512f")]
35423#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35424#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu32
35425#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
35426pub const unsafe fn _mm512_loadu_epi32(mem_addr: *const i32) -> __m512i {
35427 ptr::read_unaligned(src:mem_addr as *const __m512i)
35428}
35429
35430/// Load 256-bits (composed of 8 packed 32-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary.
35431///
35432/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_loadu_epi32&expand=3374)
35433#[inline]
35434#[target_feature(enable = "avx512f,avx512vl")]
35435#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35436#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu32
35437#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
35438pub const unsafe fn _mm256_loadu_epi32(mem_addr: *const i32) -> __m256i {
35439 ptr::read_unaligned(src:mem_addr as *const __m256i)
35440}
35441
35442/// Load 128-bits (composed of 4 packed 32-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary.
35443///
35444/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadu_epi32&expand=3371)
35445#[inline]
35446#[target_feature(enable = "avx512f,avx512vl")]
35447#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35448#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu32
35449#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
35450pub const unsafe fn _mm_loadu_epi32(mem_addr: *const i32) -> __m128i {
35451 ptr::read_unaligned(src:mem_addr as *const __m128i)
35452}
35453
35454/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
35455///
35456/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi32_storeu_epi16&expand=1460)
35457#[inline]
35458#[target_feature(enable = "avx512f")]
35459#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35460#[cfg_attr(test, assert_instr(vpmovdw))]
35461pub unsafe fn _mm512_mask_cvtepi32_storeu_epi16(mem_addr: *mut i16, k: __mmask16, a: __m512i) {
35462 vpmovdwmem(mem_addr.cast(), a.as_i32x16(), mask:k);
35463}
35464
35465/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
35466///
35467/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi32_storeu_epi16&expand=1462)
35468#[inline]
35469#[target_feature(enable = "avx512f,avx512vl")]
35470#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35471#[cfg_attr(test, assert_instr(vpmovdw))]
35472pub unsafe fn _mm256_mask_cvtepi32_storeu_epi16(mem_addr: *mut i16, k: __mmask8, a: __m256i) {
35473 vpmovdwmem256(mem_addr.cast(), a.as_i32x8(), mask:k);
35474}
35475
35476/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
35477///
35478/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi32_storeu_epi16&expand=1461)
35479#[inline]
35480#[target_feature(enable = "avx512f,avx512vl")]
35481#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35482#[cfg_attr(test, assert_instr(vpmovdw))]
35483pub unsafe fn _mm_mask_cvtepi32_storeu_epi16(mem_addr: *mut i16, k: __mmask8, a: __m128i) {
35484 vpmovdwmem128(mem_addr.cast(), a.as_i32x4(), mask:k);
35485}
35486
35487/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
35488///
35489/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi32_storeu_epi16&expand=1833)
35490#[inline]
35491#[target_feature(enable = "avx512f")]
35492#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35493#[cfg_attr(test, assert_instr(vpmovsdw))]
35494pub unsafe fn _mm512_mask_cvtsepi32_storeu_epi16(mem_addr: *mut i16, k: __mmask16, a: __m512i) {
35495 vpmovsdwmem(mem_addr.cast(), a.as_i32x16(), mask:k);
35496}
35497
35498/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
35499///
35500/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi32_storeu_epi16&expand=1832)
35501#[inline]
35502#[target_feature(enable = "avx512f,avx512vl")]
35503#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35504#[cfg_attr(test, assert_instr(vpmovsdw))]
35505pub unsafe fn _mm256_mask_cvtsepi32_storeu_epi16(mem_addr: *mut i16, k: __mmask8, a: __m256i) {
35506 vpmovsdwmem256(mem_addr.cast(), a.as_i32x8(), mask:k);
35507}
35508
35509/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
35510///
35511/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi32_storeu_epi16&expand=1831)
35512#[inline]
35513#[target_feature(enable = "avx512f,avx512vl")]
35514#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35515#[cfg_attr(test, assert_instr(vpmovsdw))]
35516pub unsafe fn _mm_mask_cvtsepi32_storeu_epi16(mem_addr: *mut i16, k: __mmask8, a: __m128i) {
35517 vpmovsdwmem128(mem_addr.cast(), a.as_i32x4(), mask:k);
35518}
35519
35520/// Convert packed unsigned 32-bit integers in a to packed 16-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
35521///
35522/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi32_storeu_epi16&expand=2068)
35523#[inline]
35524#[target_feature(enable = "avx512f")]
35525#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35526#[cfg_attr(test, assert_instr(vpmovusdw))]
35527pub unsafe fn _mm512_mask_cvtusepi32_storeu_epi16(mem_addr: *mut i16, k: __mmask16, a: __m512i) {
35528 vpmovusdwmem(mem_addr.cast(), a.as_i32x16(), mask:k);
35529}
35530
35531/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
35532///
35533/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi32_storeu_epi16&expand=2067)
35534#[inline]
35535#[target_feature(enable = "avx512f,avx512vl")]
35536#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35537#[cfg_attr(test, assert_instr(vpmovusdw))]
35538pub unsafe fn _mm256_mask_cvtusepi32_storeu_epi16(mem_addr: *mut i16, k: __mmask8, a: __m256i) {
35539 vpmovusdwmem256(mem_addr.cast(), a.as_i32x8(), mask:k);
35540}
35541
35542/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
35543///
35544/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi32_storeu_epi16&expand=2066)
35545#[inline]
35546#[target_feature(enable = "avx512f,avx512vl")]
35547#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35548#[cfg_attr(test, assert_instr(vpmovusdw))]
35549pub unsafe fn _mm_mask_cvtusepi32_storeu_epi16(mem_addr: *mut i16, k: __mmask8, a: __m128i) {
35550 vpmovusdwmem128(mem_addr.cast(), a.as_i32x4(), mask:k);
35551}
35552
35553/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
35554///
35555/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi32_storeu_epi8&expand=1463)
35556#[inline]
35557#[target_feature(enable = "avx512f")]
35558#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35559#[cfg_attr(test, assert_instr(vpmovdb))]
35560pub unsafe fn _mm512_mask_cvtepi32_storeu_epi8(mem_addr: *mut i8, k: __mmask16, a: __m512i) {
35561 vpmovdbmem(mem_addr, a.as_i32x16(), mask:k);
35562}
35563
35564/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
35565///
35566/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi32_storeu_epi8&expand=1462)
35567#[inline]
35568#[target_feature(enable = "avx512f,avx512vl")]
35569#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35570#[cfg_attr(test, assert_instr(vpmovdb))]
35571pub unsafe fn _mm256_mask_cvtepi32_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m256i) {
35572 vpmovdbmem256(mem_addr, a.as_i32x8(), mask:k);
35573}
35574
35575/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
35576///
35577/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi32_storeu_epi8&expand=1461)
35578#[inline]
35579#[target_feature(enable = "avx512f,avx512vl")]
35580#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35581#[cfg_attr(test, assert_instr(vpmovdb))]
35582pub unsafe fn _mm_mask_cvtepi32_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
35583 vpmovdbmem128(mem_addr, a.as_i32x4(), mask:k);
35584}
35585
35586/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
35587///
35588/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi32_storeu_epi8&expand=1836)
35589#[inline]
35590#[target_feature(enable = "avx512f")]
35591#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35592#[cfg_attr(test, assert_instr(vpmovsdb))]
35593pub unsafe fn _mm512_mask_cvtsepi32_storeu_epi8(mem_addr: *mut i8, k: __mmask16, a: __m512i) {
35594 vpmovsdbmem(mem_addr, a.as_i32x16(), mask:k);
35595}
35596
35597/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
35598///
35599/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi32_storeu_epi8&expand=1835)
35600#[inline]
35601#[target_feature(enable = "avx512f,avx512vl")]
35602#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35603#[cfg_attr(test, assert_instr(vpmovsdb))]
35604pub unsafe fn _mm256_mask_cvtsepi32_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m256i) {
35605 vpmovsdbmem256(mem_addr, a.as_i32x8(), mask:k);
35606}
35607
35608/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
35609///
35610/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi32_storeu_epi8&expand=1834)
35611#[inline]
35612#[target_feature(enable = "avx512f,avx512vl")]
35613#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35614#[cfg_attr(test, assert_instr(vpmovsdb))]
35615pub unsafe fn _mm_mask_cvtsepi32_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
35616 vpmovsdbmem128(mem_addr, a.as_i32x4(), mask:k);
35617}
35618
35619/// Convert packed unsigned 32-bit integers in a to packed 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
35620///
35621/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi32_storeu_epi8&expand=2071)
35622#[inline]
35623#[target_feature(enable = "avx512f")]
35624#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35625#[cfg_attr(test, assert_instr(vpmovusdb))]
35626pub unsafe fn _mm512_mask_cvtusepi32_storeu_epi8(mem_addr: *mut i8, k: __mmask16, a: __m512i) {
35627 vpmovusdbmem(mem_addr, a.as_i32x16(), mask:k);
35628}
35629
35630/// Convert packed unsigned 32-bit integers in a to packed 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
35631///
35632/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi32_storeu_epi8&expand=2070)
35633#[inline]
35634#[target_feature(enable = "avx512f,avx512vl")]
35635#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35636#[cfg_attr(test, assert_instr(vpmovusdb))]
35637pub unsafe fn _mm256_mask_cvtusepi32_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m256i) {
35638 vpmovusdbmem256(mem_addr, a.as_i32x8(), mask:k);
35639}
35640
35641/// Convert packed unsigned 32-bit integers in a to packed 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
35642///
35643/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi32_storeu_epi8&expand=2069)
35644#[inline]
35645#[target_feature(enable = "avx512f,avx512vl")]
35646#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35647#[cfg_attr(test, assert_instr(vpmovusdb))]
35648pub unsafe fn _mm_mask_cvtusepi32_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
35649 vpmovusdbmem128(mem_addr, a.as_i32x4(), mask:k);
35650}
35651
35652/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
35653///
35654/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi64_storeu_epi16&expand=1513)
35655#[inline]
35656#[target_feature(enable = "avx512f")]
35657#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35658#[cfg_attr(test, assert_instr(vpmovqw))]
35659pub unsafe fn _mm512_mask_cvtepi64_storeu_epi16(mem_addr: *mut i16, k: __mmask8, a: __m512i) {
35660 vpmovqwmem(mem_addr.cast(), a.as_i64x8(), mask:k);
35661}
35662
35663/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
35664///
35665/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi64_storeu_epi16&expand=1512)
35666#[inline]
35667#[target_feature(enable = "avx512f,avx512vl")]
35668#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35669#[cfg_attr(test, assert_instr(vpmovqw))]
35670pub unsafe fn _mm256_mask_cvtepi64_storeu_epi16(mem_addr: *mut i16, k: __mmask8, a: __m256i) {
35671 vpmovqwmem256(mem_addr.cast(), a.as_i64x4(), mask:k);
35672}
35673
35674/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
35675///
35676/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi64_storeu_epi16&expand=1511)
35677#[inline]
35678#[target_feature(enable = "avx512f,avx512vl")]
35679#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35680#[cfg_attr(test, assert_instr(vpmovqw))]
35681pub unsafe fn _mm_mask_cvtepi64_storeu_epi16(mem_addr: *mut i16, k: __mmask8, a: __m128i) {
35682 vpmovqwmem128(mem_addr.cast(), a.as_i64x2(), mask:k);
35683}
35684
35685/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
35686///
35687/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi64_storeu_epi16&expand=1866)
35688#[inline]
35689#[target_feature(enable = "avx512f")]
35690#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35691#[cfg_attr(test, assert_instr(vpmovsqw))]
35692pub unsafe fn _mm512_mask_cvtsepi64_storeu_epi16(mem_addr: *mut i16, k: __mmask8, a: __m512i) {
35693 vpmovsqwmem(mem_addr.cast(), a.as_i64x8(), mask:k);
35694}
35695
35696/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
35697///
35698/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi64_storeu_epi16&expand=1865)
35699#[inline]
35700#[target_feature(enable = "avx512f,avx512vl")]
35701#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35702#[cfg_attr(test, assert_instr(vpmovsqw))]
35703pub unsafe fn _mm256_mask_cvtsepi64_storeu_epi16(mem_addr: *mut i16, k: __mmask8, a: __m256i) {
35704 vpmovsqwmem256(mem_addr.cast(), a.as_i64x4(), mask:k);
35705}
35706
35707/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
35708///
35709/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi64_storeu_epi16&expand=1864)
35710#[inline]
35711#[target_feature(enable = "avx512f,avx512vl")]
35712#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35713#[cfg_attr(test, assert_instr(vpmovsqw))]
35714pub unsafe fn _mm_mask_cvtsepi64_storeu_epi16(mem_addr: *mut i16, k: __mmask8, a: __m128i) {
35715 vpmovsqwmem128(mem_addr.cast(), a.as_i64x2(), mask:k);
35716}
35717
35718/// Convert packed unsigned 64-bit integers in a to packed 16-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
35719///
35720/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi64_storeu_epi16&expand=2101)
35721#[inline]
35722#[target_feature(enable = "avx512f")]
35723#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35724#[cfg_attr(test, assert_instr(vpmovusqw))]
35725pub unsafe fn _mm512_mask_cvtusepi64_storeu_epi16(mem_addr: *mut i16, k: __mmask8, a: __m512i) {
35726 vpmovusqwmem(mem_addr.cast(), a.as_i64x8(), mask:k);
35727}
35728
35729/// Convert packed unsigned 64-bit integers in a to packed 16-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
35730///
35731/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi64_storeu_epi16&expand=2100)
35732#[inline]
35733#[target_feature(enable = "avx512f,avx512vl")]
35734#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35735#[cfg_attr(test, assert_instr(vpmovusqw))]
35736pub unsafe fn _mm256_mask_cvtusepi64_storeu_epi16(mem_addr: *mut i16, k: __mmask8, a: __m256i) {
35737 vpmovusqwmem256(mem_addr.cast(), a.as_i64x4(), mask:k);
35738}
35739
35740/// Convert packed unsigned 64-bit integers in a to packed 16-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
35741///
35742/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi64_storeu_epi16&expand=2099)
35743#[inline]
35744#[target_feature(enable = "avx512f,avx512vl")]
35745#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35746#[cfg_attr(test, assert_instr(vpmovusqw))]
35747pub unsafe fn _mm_mask_cvtusepi64_storeu_epi16(mem_addr: *mut i16, k: __mmask8, a: __m128i) {
35748 vpmovusqwmem128(mem_addr.cast(), a.as_i64x2(), mask:k);
35749}
35750
35751/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
35752///
35753/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi64_storeu_epi8&expand=1519)
35754#[inline]
35755#[target_feature(enable = "avx512f")]
35756#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35757#[cfg_attr(test, assert_instr(vpmovqb))]
35758pub unsafe fn _mm512_mask_cvtepi64_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m512i) {
35759 vpmovqbmem(mem_addr, a.as_i64x8(), mask:k);
35760}
35761
35762/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
35763///
35764/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi64_storeu_epi8&expand=1518)
35765#[inline]
35766#[target_feature(enable = "avx512f,avx512vl")]
35767#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35768#[cfg_attr(test, assert_instr(vpmovqb))]
35769pub unsafe fn _mm256_mask_cvtepi64_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m256i) {
35770 vpmovqbmem256(mem_addr, a.as_i64x4(), mask:k);
35771}
35772
35773/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
35774///
35775/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi64_storeu_epi8&expand=1517)
35776#[inline]
35777#[target_feature(enable = "avx512f,avx512vl")]
35778#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35779#[cfg_attr(test, assert_instr(vpmovqb))]
35780pub unsafe fn _mm_mask_cvtepi64_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
35781 vpmovqbmem128(mem_addr, a.as_i64x2(), mask:k);
35782}
35783
35784/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
35785///
35786/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi64_storeu_epi8&expand=1872)
35787#[inline]
35788#[target_feature(enable = "avx512f")]
35789#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35790#[cfg_attr(test, assert_instr(vpmovsqb))]
35791pub unsafe fn _mm512_mask_cvtsepi64_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m512i) {
35792 vpmovsqbmem(mem_addr, a.as_i64x8(), mask:k);
35793}
35794
35795/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
35796///
35797/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi64_storeu_epi8&expand=1871)
35798#[inline]
35799#[target_feature(enable = "avx512f,avx512vl")]
35800#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35801#[cfg_attr(test, assert_instr(vpmovsqb))]
35802pub unsafe fn _mm256_mask_cvtsepi64_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m256i) {
35803 vpmovsqbmem256(mem_addr, a.as_i64x4(), mask:k);
35804}
35805
35806/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
35807///
35808/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi64_storeu_epi8&expand=1870)
35809#[inline]
35810#[target_feature(enable = "avx512f,avx512vl")]
35811#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35812#[cfg_attr(test, assert_instr(vpmovsqb))]
35813pub unsafe fn _mm_mask_cvtsepi64_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
35814 vpmovsqbmem128(mem_addr, a.as_i64x2(), mask:k);
35815}
35816
35817/// Convert packed unsigned 64-bit integers in a to packed 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
35818///
35819/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi64_storeu_epi8&expand=2107)
35820#[inline]
35821#[target_feature(enable = "avx512f")]
35822#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35823#[cfg_attr(test, assert_instr(vpmovusqb))]
35824pub unsafe fn _mm512_mask_cvtusepi64_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m512i) {
35825 vpmovusqbmem(mem_addr, a.as_i64x8(), mask:k);
35826}
35827
35828/// Convert packed unsigned 64-bit integers in a to packed 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
35829///
35830/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi64_storeu_epi8&expand=2106)
35831#[inline]
35832#[target_feature(enable = "avx512f,avx512vl")]
35833#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35834#[cfg_attr(test, assert_instr(vpmovusqb))]
35835pub unsafe fn _mm256_mask_cvtusepi64_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m256i) {
35836 vpmovusqbmem256(mem_addr, a.as_i64x4(), mask:k);
35837}
35838
35839/// Convert packed unsigned 64-bit integers in a to packed 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
35840///
35841/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi64_storeu_epi8&expand=2105)
35842#[inline]
35843#[target_feature(enable = "avx512f,avx512vl")]
35844#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35845#[cfg_attr(test, assert_instr(vpmovusqb))]
35846pub unsafe fn _mm_mask_cvtusepi64_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
35847 vpmovusqbmem128(mem_addr, a.as_i64x2(), mask:k);
35848}
35849
35850///Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
35851///
35852/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi64_storeu_epi32&expand=1516)
35853#[inline]
35854#[target_feature(enable = "avx512f")]
35855#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35856#[cfg_attr(test, assert_instr(vpmovqd))]
35857pub unsafe fn _mm512_mask_cvtepi64_storeu_epi32(mem_addr: *mut i32, k: __mmask8, a: __m512i) {
35858 vpmovqdmem(mem_addr.cast(), a.as_i64x8(), mask:k);
35859}
35860
35861///Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
35862///
35863/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi64_storeu_epi32&expand=1515)
35864#[inline]
35865#[target_feature(enable = "avx512f,avx512vl")]
35866#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35867#[cfg_attr(test, assert_instr(vpmovqd))]
35868pub unsafe fn _mm256_mask_cvtepi64_storeu_epi32(mem_addr: *mut i32, k: __mmask8, a: __m256i) {
35869 vpmovqdmem256(mem_addr.cast(), a.as_i64x4(), mask:k);
35870}
35871
35872///Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
35873///
35874/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi64_storeu_epi32&expand=1514)
35875#[inline]
35876#[target_feature(enable = "avx512f,avx512vl")]
35877#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35878#[cfg_attr(test, assert_instr(vpmovqd))]
35879pub unsafe fn _mm_mask_cvtepi64_storeu_epi32(mem_addr: *mut i32, k: __mmask8, a: __m128i) {
35880 vpmovqdmem128(mem_addr.cast(), a.as_i64x2(), mask:k);
35881}
35882
35883/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
35884///
35885/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi64_storeu_epi32&expand=1869)
35886#[inline]
35887#[target_feature(enable = "avx512f")]
35888#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35889#[cfg_attr(test, assert_instr(vpmovsqd))]
35890pub unsafe fn _mm512_mask_cvtsepi64_storeu_epi32(mem_addr: *mut i32, k: __mmask8, a: __m512i) {
35891 vpmovsqdmem(mem_addr.cast(), a.as_i64x8(), mask:k);
35892}
35893
35894/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
35895///
35896/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi64_storeu_epi32&expand=1868)
35897#[inline]
35898#[target_feature(enable = "avx512f,avx512vl")]
35899#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35900#[cfg_attr(test, assert_instr(vpmovsqd))]
35901pub unsafe fn _mm256_mask_cvtsepi64_storeu_epi32(mem_addr: *mut i32, k: __mmask8, a: __m256i) {
35902 vpmovsqdmem256(mem_addr.cast(), a.as_i64x4(), mask:k);
35903}
35904
35905/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
35906///
35907/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi64_storeu_epi32&expand=1867)
35908#[inline]
35909#[target_feature(enable = "avx512f,avx512vl")]
35910#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35911#[cfg_attr(test, assert_instr(vpmovsqd))]
35912pub unsafe fn _mm_mask_cvtsepi64_storeu_epi32(mem_addr: *mut i32, k: __mmask8, a: __m128i) {
35913 vpmovsqdmem128(mem_addr.cast(), a.as_i64x2(), mask:k);
35914}
35915
35916/// Convert packed unsigned 64-bit integers in a to packed 32-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
35917///
35918/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi64_storeu_epi32&expand=2104)
35919#[inline]
35920#[target_feature(enable = "avx512f")]
35921#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35922#[cfg_attr(test, assert_instr(vpmovusqd))]
35923pub unsafe fn _mm512_mask_cvtusepi64_storeu_epi32(mem_addr: *mut i32, k: __mmask8, a: __m512i) {
35924 vpmovusqdmem(mem_addr.cast(), a.as_i64x8(), mask:k);
35925}
35926
35927/// Convert packed unsigned 64-bit integers in a to packed 32-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
35928///
35929/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi64_storeu_epi32&expand=2103)
35930#[inline]
35931#[target_feature(enable = "avx512f,avx512vl")]
35932#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35933#[cfg_attr(test, assert_instr(vpmovusqd))]
35934pub unsafe fn _mm256_mask_cvtusepi64_storeu_epi32(mem_addr: *mut i32, k: __mmask8, a: __m256i) {
35935 vpmovusqdmem256(mem_addr.cast(), a.as_i64x4(), mask:k);
35936}
35937
35938/// Convert packed unsigned 64-bit integers in a to packed 32-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
35939///
35940/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi64_storeu_epi32&expand=2102)
35941#[inline]
35942#[target_feature(enable = "avx512f,avx512vl")]
35943#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35944#[cfg_attr(test, assert_instr(vpmovusqd))]
35945pub unsafe fn _mm_mask_cvtusepi64_storeu_epi32(mem_addr: *mut i32, k: __mmask8, a: __m128i) {
35946 vpmovusqdmem128(mem_addr.cast(), a.as_i64x2(), mask:k);
35947}
35948
35949/// Store 512-bits (composed of 16 packed 32-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary.
35950///
35951/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_storeu_epi32&expand=5628)
35952#[inline]
35953#[target_feature(enable = "avx512f")]
35954#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35955#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu32
35956#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
35957pub const unsafe fn _mm512_storeu_epi32(mem_addr: *mut i32, a: __m512i) {
35958 ptr::write_unaligned(dst:mem_addr as *mut __m512i, src:a);
35959}
35960
35961/// Store 256-bits (composed of 8 packed 32-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary.
35962///
35963/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_storeu_epi32&expand=5626)
35964#[inline]
35965#[target_feature(enable = "avx512f,avx512vl")]
35966#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35967#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu32
35968#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
35969pub const unsafe fn _mm256_storeu_epi32(mem_addr: *mut i32, a: __m256i) {
35970 ptr::write_unaligned(dst:mem_addr as *mut __m256i, src:a);
35971}
35972
35973/// Store 128-bits (composed of 4 packed 32-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary.
35974///
35975/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storeu_epi32&expand=5624)
35976#[inline]
35977#[target_feature(enable = "avx512f,avx512vl")]
35978#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35979#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu32
35980#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
35981pub const unsafe fn _mm_storeu_epi32(mem_addr: *mut i32, a: __m128i) {
35982 ptr::write_unaligned(dst:mem_addr as *mut __m128i, src:a);
35983}
35984
35985/// Load 512-bits (composed of 8 packed 64-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary.
35986///
35987/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_loadu_epi64&expand=3386)
35988#[inline]
35989#[target_feature(enable = "avx512f")]
35990#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35991#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu64
35992#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
35993pub const unsafe fn _mm512_loadu_epi64(mem_addr: *const i64) -> __m512i {
35994 ptr::read_unaligned(src:mem_addr as *const __m512i)
35995}
35996
35997/// Load 256-bits (composed of 4 packed 64-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary.
35998///
35999/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_loadu_epi64&expand=3383)
36000#[inline]
36001#[target_feature(enable = "avx512f,avx512vl")]
36002#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36003#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu64
36004#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36005pub const unsafe fn _mm256_loadu_epi64(mem_addr: *const i64) -> __m256i {
36006 ptr::read_unaligned(src:mem_addr as *const __m256i)
36007}
36008
36009/// Load 128-bits (composed of 2 packed 64-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary.
36010///
36011/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadu_epi64&expand=3380)
36012#[inline]
36013#[target_feature(enable = "avx512f,avx512vl")]
36014#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36015#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu64
36016#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36017pub const unsafe fn _mm_loadu_epi64(mem_addr: *const i64) -> __m128i {
36018 ptr::read_unaligned(src:mem_addr as *const __m128i)
36019}
36020
36021/// Store 512-bits (composed of 8 packed 64-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary.
36022///
36023/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_storeu_epi64&expand=5634)
36024#[inline]
36025#[target_feature(enable = "avx512f")]
36026#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36027#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu64
36028#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36029pub const unsafe fn _mm512_storeu_epi64(mem_addr: *mut i64, a: __m512i) {
36030 ptr::write_unaligned(dst:mem_addr as *mut __m512i, src:a);
36031}
36032
36033/// Store 256-bits (composed of 4 packed 64-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary.
36034///
36035/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_storeu_epi64&expand=5632)
36036#[inline]
36037#[target_feature(enable = "avx512f,avx512vl")]
36038#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36039#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu64
36040#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36041pub const unsafe fn _mm256_storeu_epi64(mem_addr: *mut i64, a: __m256i) {
36042 ptr::write_unaligned(dst:mem_addr as *mut __m256i, src:a);
36043}
36044
36045/// Store 128-bits (composed of 2 packed 64-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary.
36046///
36047/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storeu_epi64&expand=5630)
36048#[inline]
36049#[target_feature(enable = "avx512f,avx512vl")]
36050#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36051#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu64
36052#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36053pub const unsafe fn _mm_storeu_epi64(mem_addr: *mut i64, a: __m128i) {
36054 ptr::write_unaligned(dst:mem_addr as *mut __m128i, src:a);
36055}
36056
36057/// Load 512-bits of integer data from memory into dst. mem_addr does not need to be aligned on any particular boundary.
36058///
36059/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_loadu_si512&expand=3420)
36060#[inline]
36061#[target_feature(enable = "avx512f")]
36062#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36063#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu32
36064#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36065pub const unsafe fn _mm512_loadu_si512(mem_addr: *const __m512i) -> __m512i {
36066 ptr::read_unaligned(src:mem_addr)
36067}
36068
36069/// Store 512-bits of integer data from a into memory. mem_addr does not need to be aligned on any particular boundary.
36070///
36071/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_storeu_si512&expand=5657)
36072#[inline]
36073#[target_feature(enable = "avx512f")]
36074#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36075#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu32
36076#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36077pub const unsafe fn _mm512_storeu_si512(mem_addr: *mut __m512i, a: __m512i) {
36078 ptr::write_unaligned(dst:mem_addr, src:a);
36079}
36080
36081/// Loads 512-bits (composed of 8 packed double-precision (64-bit)
36082/// floating-point elements) from memory into result.
36083/// `mem_addr` does not need to be aligned on any particular boundary.
36084///
36085/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_loadu_pd)
36086#[inline]
36087#[target_feature(enable = "avx512f")]
36088#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36089#[cfg_attr(test, assert_instr(vmovups))]
36090#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36091pub const unsafe fn _mm512_loadu_pd(mem_addr: *const f64) -> __m512d {
36092 ptr::read_unaligned(src:mem_addr as *const __m512d)
36093}
36094
36095/// Stores 512-bits (composed of 8 packed double-precision (64-bit)
36096/// floating-point elements) from `a` into memory.
36097/// `mem_addr` does not need to be aligned on any particular boundary.
36098///
36099/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_storeu_pd)
36100#[inline]
36101#[target_feature(enable = "avx512f")]
36102#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36103#[cfg_attr(test, assert_instr(vmovups))]
36104#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36105pub const unsafe fn _mm512_storeu_pd(mem_addr: *mut f64, a: __m512d) {
36106 ptr::write_unaligned(dst:mem_addr as *mut __m512d, src:a);
36107}
36108
36109/// Loads 512-bits (composed of 16 packed single-precision (32-bit)
36110/// floating-point elements) from memory into result.
36111/// `mem_addr` does not need to be aligned on any particular boundary.
36112///
36113/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_loadu_ps)
36114#[inline]
36115#[target_feature(enable = "avx512f")]
36116#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36117#[cfg_attr(test, assert_instr(vmovups))]
36118#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36119pub const unsafe fn _mm512_loadu_ps(mem_addr: *const f32) -> __m512 {
36120 ptr::read_unaligned(src:mem_addr as *const __m512)
36121}
36122
36123/// Stores 512-bits (composed of 16 packed single-precision (32-bit)
36124/// floating-point elements) from `a` into memory.
36125/// `mem_addr` does not need to be aligned on any particular boundary.
36126///
36127/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_storeu_ps)
36128#[inline]
36129#[target_feature(enable = "avx512f")]
36130#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36131#[cfg_attr(test, assert_instr(vmovups))]
36132#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36133pub const unsafe fn _mm512_storeu_ps(mem_addr: *mut f32, a: __m512) {
36134 ptr::write_unaligned(dst:mem_addr as *mut __m512, src:a);
36135}
36136
36137/// Load 512-bits of integer data from memory into dst. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
36138///
36139/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_load_si512&expand=3345)
36140#[inline]
36141#[target_feature(enable = "avx512f")]
36142#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36143#[cfg_attr(
36144 all(test, not(all(target_arch = "x86", target_env = "msvc"))),
36145 assert_instr(vmovaps)
36146)] //should be vmovdqa32
36147#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36148pub const unsafe fn _mm512_load_si512(mem_addr: *const __m512i) -> __m512i {
36149 ptr::read(src:mem_addr)
36150}
36151
36152/// Store 512-bits of integer data from a into memory. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
36153///
36154/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_store_si512&expand=5598)
36155#[inline]
36156#[target_feature(enable = "avx512f")]
36157#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36158#[cfg_attr(
36159 all(test, not(all(target_arch = "x86", target_env = "msvc"))),
36160 assert_instr(vmovaps)
36161)] //should be vmovdqa32
36162#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36163pub const unsafe fn _mm512_store_si512(mem_addr: *mut __m512i, a: __m512i) {
36164 ptr::write(dst:mem_addr, src:a);
36165}
36166
36167/// Load 512-bits (composed of 16 packed 32-bit integers) from memory into dst. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
36168///
36169/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_load_epi32&expand=3304)
36170#[inline]
36171#[target_feature(enable = "avx512f")]
36172#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36173#[cfg_attr(
36174 all(test, not(all(target_arch = "x86", target_env = "msvc"))),
36175 assert_instr(vmovaps)
36176)] //should be vmovdqa32
36177#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36178pub const unsafe fn _mm512_load_epi32(mem_addr: *const i32) -> __m512i {
36179 ptr::read(src:mem_addr as *const __m512i)
36180}
36181
36182/// Load 256-bits (composed of 8 packed 32-bit integers) from memory into dst. mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
36183///
36184/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_load_epi32&expand=3301)
36185#[inline]
36186#[target_feature(enable = "avx512f,avx512vl")]
36187#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36188#[cfg_attr(
36189 all(test, not(all(target_arch = "x86", target_env = "msvc"))),
36190 assert_instr(vmovaps)
36191)] //should be vmovdqa32
36192#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36193pub const unsafe fn _mm256_load_epi32(mem_addr: *const i32) -> __m256i {
36194 ptr::read(src:mem_addr as *const __m256i)
36195}
36196
36197/// Load 128-bits (composed of 4 packed 32-bit integers) from memory into dst. mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
36198///
36199/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_load_epi32&expand=3298)
36200#[inline]
36201#[target_feature(enable = "avx512f,avx512vl")]
36202#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36203#[cfg_attr(
36204 all(test, not(all(target_arch = "x86", target_env = "msvc"))),
36205 assert_instr(vmovaps)
36206)] //should be vmovdqa32
36207#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36208pub const unsafe fn _mm_load_epi32(mem_addr: *const i32) -> __m128i {
36209 ptr::read(src:mem_addr as *const __m128i)
36210}
36211
36212/// Store 512-bits (composed of 16 packed 32-bit integers) from a into memory. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
36213///
36214/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_store_epi32&expand=5569)
36215#[inline]
36216#[target_feature(enable = "avx512f")]
36217#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36218#[cfg_attr(
36219 all(test, not(all(target_arch = "x86", target_env = "msvc"))),
36220 assert_instr(vmovaps)
36221)] //should be vmovdqa32
36222#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36223pub const unsafe fn _mm512_store_epi32(mem_addr: *mut i32, a: __m512i) {
36224 ptr::write(dst:mem_addr as *mut __m512i, src:a);
36225}
36226
36227/// Store 256-bits (composed of 8 packed 32-bit integers) from a into memory. mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
36228///
36229/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_store_epi32&expand=5567)
36230#[inline]
36231#[target_feature(enable = "avx512f,avx512vl")]
36232#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36233#[cfg_attr(
36234 all(test, not(all(target_arch = "x86", target_env = "msvc"))),
36235 assert_instr(vmovaps)
36236)] //should be vmovdqa32
36237#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36238pub const unsafe fn _mm256_store_epi32(mem_addr: *mut i32, a: __m256i) {
36239 ptr::write(dst:mem_addr as *mut __m256i, src:a);
36240}
36241
36242/// Store 128-bits (composed of 4 packed 32-bit integers) from a into memory. mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
36243///
36244/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_store_epi32&expand=5565)
36245#[inline]
36246#[target_feature(enable = "avx512f,avx512vl")]
36247#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36248#[cfg_attr(
36249 all(test, not(all(target_arch = "x86", target_env = "msvc"))),
36250 assert_instr(vmovaps)
36251)] //should be vmovdqa32
36252#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36253pub const unsafe fn _mm_store_epi32(mem_addr: *mut i32, a: __m128i) {
36254 ptr::write(dst:mem_addr as *mut __m128i, src:a);
36255}
36256
36257/// Load 512-bits (composed of 8 packed 64-bit integers) from memory into dst. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
36258///
36259/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_load_epi64&expand=3313)
36260#[inline]
36261#[target_feature(enable = "avx512f")]
36262#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36263#[cfg_attr(
36264 all(test, not(all(target_arch = "x86", target_env = "msvc"))),
36265 assert_instr(vmovaps)
36266)] //should be vmovdqa64
36267#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36268pub const unsafe fn _mm512_load_epi64(mem_addr: *const i64) -> __m512i {
36269 ptr::read(src:mem_addr as *const __m512i)
36270}
36271
36272/// Load 256-bits (composed of 4 packed 64-bit integers) from memory into dst. mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
36273///
36274/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_load_epi64&expand=3310)
36275#[inline]
36276#[target_feature(enable = "avx512f,avx512vl")]
36277#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36278#[cfg_attr(
36279 all(test, not(all(target_arch = "x86", target_env = "msvc"))),
36280 assert_instr(vmovaps)
36281)] //should be vmovdqa64
36282#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36283pub const unsafe fn _mm256_load_epi64(mem_addr: *const i64) -> __m256i {
36284 ptr::read(src:mem_addr as *const __m256i)
36285}
36286
36287/// Load 128-bits (composed of 2 packed 64-bit integers) from memory into dst. mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
36288///
36289/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_load_epi64&expand=3307)
36290#[inline]
36291#[target_feature(enable = "avx512f,avx512vl")]
36292#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36293#[cfg_attr(
36294 all(test, not(all(target_arch = "x86", target_env = "msvc"))),
36295 assert_instr(vmovaps)
36296)] //should be vmovdqa64
36297#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36298pub const unsafe fn _mm_load_epi64(mem_addr: *const i64) -> __m128i {
36299 ptr::read(src:mem_addr as *const __m128i)
36300}
36301
36302/// Store 512-bits (composed of 8 packed 64-bit integers) from a into memory. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
36303///
36304/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_store_epi64&expand=5575)
36305#[inline]
36306#[target_feature(enable = "avx512f")]
36307#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36308#[cfg_attr(
36309 all(test, not(all(target_arch = "x86", target_env = "msvc"))),
36310 assert_instr(vmovaps)
36311)] //should be vmovdqa64
36312#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36313pub const unsafe fn _mm512_store_epi64(mem_addr: *mut i64, a: __m512i) {
36314 ptr::write(dst:mem_addr as *mut __m512i, src:a);
36315}
36316
36317/// Store 256-bits (composed of 4 packed 64-bit integers) from a into memory. mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
36318///
36319/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_store_epi64&expand=5573)
36320#[inline]
36321#[target_feature(enable = "avx512f,avx512vl")]
36322#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36323#[cfg_attr(
36324 all(test, not(all(target_arch = "x86", target_env = "msvc"))),
36325 assert_instr(vmovaps)
36326)] //should be vmovdqa64
36327#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36328pub const unsafe fn _mm256_store_epi64(mem_addr: *mut i64, a: __m256i) {
36329 ptr::write(dst:mem_addr as *mut __m256i, src:a);
36330}
36331
36332/// Store 128-bits (composed of 2 packed 64-bit integers) from a into memory. mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
36333///
36334/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_store_epi64&expand=5571)
36335#[inline]
36336#[target_feature(enable = "avx512f,avx512vl")]
36337#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36338#[cfg_attr(
36339 all(test, not(all(target_arch = "x86", target_env = "msvc"))),
36340 assert_instr(vmovaps)
36341)] //should be vmovdqa64
36342#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36343pub const unsafe fn _mm_store_epi64(mem_addr: *mut i64, a: __m128i) {
36344 ptr::write(dst:mem_addr as *mut __m128i, src:a);
36345}
36346
36347/// Load 512-bits (composed of 16 packed single-precision (32-bit) floating-point elements) from memory into dst. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
36348///
36349/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_load_ps&expand=3336)
36350#[inline]
36351#[target_feature(enable = "avx512f")]
36352#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36353#[cfg_attr(
36354 all(test, not(all(target_arch = "x86", target_env = "msvc"))),
36355 assert_instr(vmovaps)
36356)]
36357#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36358pub const unsafe fn _mm512_load_ps(mem_addr: *const f32) -> __m512 {
36359 ptr::read(src:mem_addr as *const __m512)
36360}
36361
36362/// Store 512-bits of integer data from a into memory. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
36363///
36364/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_store_ps&expand=5592)
36365#[inline]
36366#[target_feature(enable = "avx512f")]
36367#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36368#[cfg_attr(
36369 all(test, not(all(target_arch = "x86", target_env = "msvc"))),
36370 assert_instr(vmovaps)
36371)]
36372#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36373pub const unsafe fn _mm512_store_ps(mem_addr: *mut f32, a: __m512) {
36374 ptr::write(dst:mem_addr as *mut __m512, src:a);
36375}
36376
36377/// Load 512-bits (composed of 8 packed double-precision (64-bit) floating-point elements) from memory into dst. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
36378///
36379/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_load_pd&expand=3326)
36380#[inline]
36381#[target_feature(enable = "avx512f")]
36382#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36383#[cfg_attr(
36384 all(test, not(all(target_arch = "x86", target_env = "msvc"))),
36385 assert_instr(vmovaps)
36386)] //should be vmovapd
36387#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36388pub const unsafe fn _mm512_load_pd(mem_addr: *const f64) -> __m512d {
36389 ptr::read(src:mem_addr as *const __m512d)
36390}
36391
36392/// Store 512-bits (composed of 8 packed double-precision (64-bit) floating-point elements) from a into memory. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
36393///
36394/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_store_pd&expand=5585)
36395#[inline]
36396#[target_feature(enable = "avx512f")]
36397#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36398#[cfg_attr(
36399 all(test, not(all(target_arch = "x86", target_env = "msvc"))),
36400 assert_instr(vmovaps)
36401)] //should be vmovapd
36402#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36403pub const unsafe fn _mm512_store_pd(mem_addr: *mut f64, a: __m512d) {
36404 ptr::write(dst:mem_addr as *mut __m512d, src:a);
36405}
36406
36407/// Load packed 32-bit integers from memory into dst using writemask k
36408/// (elements are copied from src when the corresponding mask bit is not set).
36409/// mem_addr does not need to be aligned on any particular boundary.
36410///
36411/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_loadu_epi32)
36412#[inline]
36413#[target_feature(enable = "avx512f")]
36414#[cfg_attr(test, assert_instr(vmovdqu32))]
36415#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36416#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36417pub const unsafe fn _mm512_mask_loadu_epi32(
36418 src: __m512i,
36419 k: __mmask16,
36420 mem_addr: *const i32,
36421) -> __m512i {
36422 let mask: Simd = simd_select_bitmask(m:k, yes:i32x16::splat(!0), no:i32x16::ZERO);
36423 simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, src.as_i32x16()).as_m512i()
36424}
36425
36426/// Load packed 32-bit integers from memory into dst using zeromask k
36427/// (elements are zeroed out when the corresponding mask bit is not set).
36428/// mem_addr does not need to be aligned on any particular boundary.
36429///
36430/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_loadu_epi32)
36431#[inline]
36432#[target_feature(enable = "avx512f")]
36433#[cfg_attr(test, assert_instr(vmovdqu32))]
36434#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36435#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36436pub const unsafe fn _mm512_maskz_loadu_epi32(k: __mmask16, mem_addr: *const i32) -> __m512i {
36437 _mm512_mask_loadu_epi32(src:_mm512_setzero_si512(), k, mem_addr)
36438}
36439
36440/// Load packed 64-bit integers from memory into dst using writemask k
36441/// (elements are copied from src when the corresponding mask bit is not set).
36442/// mem_addr does not need to be aligned on any particular boundary.
36443///
36444/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_loadu_epi64)
36445#[inline]
36446#[target_feature(enable = "avx512f")]
36447#[cfg_attr(test, assert_instr(vmovdqu64))]
36448#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36449#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36450pub const unsafe fn _mm512_mask_loadu_epi64(
36451 src: __m512i,
36452 k: __mmask8,
36453 mem_addr: *const i64,
36454) -> __m512i {
36455 let mask: Simd = simd_select_bitmask(m:k, yes:i64x8::splat(!0), no:i64x8::ZERO);
36456 simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, src.as_i64x8()).as_m512i()
36457}
36458
36459/// Load packed 64-bit integers from memory into dst using zeromask k
36460/// (elements are zeroed out when the corresponding mask bit is not set).
36461/// mem_addr does not need to be aligned on any particular boundary.
36462///
36463/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_loadu_epi64)
36464#[inline]
36465#[target_feature(enable = "avx512f")]
36466#[cfg_attr(test, assert_instr(vmovdqu64))]
36467#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36468#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36469pub const unsafe fn _mm512_maskz_loadu_epi64(k: __mmask8, mem_addr: *const i64) -> __m512i {
36470 _mm512_mask_loadu_epi64(src:_mm512_setzero_si512(), k, mem_addr)
36471}
36472
36473/// Load packed single-precision (32-bit) floating-point elements from memory into dst using writemask k
36474/// (elements are copied from src when the corresponding mask bit is not set).
36475/// mem_addr does not need to be aligned on any particular boundary.
36476///
36477/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_loadu_ps)
36478#[inline]
36479#[target_feature(enable = "avx512f")]
36480#[cfg_attr(test, assert_instr(vmovups))]
36481#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36482#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36483pub const unsafe fn _mm512_mask_loadu_ps(
36484 src: __m512,
36485 k: __mmask16,
36486 mem_addr: *const f32,
36487) -> __m512 {
36488 let mask: Simd = simd_select_bitmask(m:k, yes:i32x16::splat(!0), no:i32x16::ZERO);
36489 simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, src.as_f32x16()).as_m512()
36490}
36491
36492/// Load packed single-precision (32-bit) floating-point elements from memory into dst using zeromask k
36493/// (elements are zeroed out when the corresponding mask bit is not set).
36494/// mem_addr does not need to be aligned on any particular boundary.
36495///
36496/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_loadu_ps)
36497#[inline]
36498#[target_feature(enable = "avx512f")]
36499#[cfg_attr(test, assert_instr(vmovups))]
36500#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36501#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36502pub const unsafe fn _mm512_maskz_loadu_ps(k: __mmask16, mem_addr: *const f32) -> __m512 {
36503 _mm512_mask_loadu_ps(src:_mm512_setzero_ps(), k, mem_addr)
36504}
36505
36506/// Load packed double-precision (64-bit) floating-point elements from memory into dst using writemask k
36507/// (elements are copied from src when the corresponding mask bit is not set).
36508/// mem_addr does not need to be aligned on any particular boundary.
36509///
36510/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_loadu_pd)
36511#[inline]
36512#[target_feature(enable = "avx512f")]
36513#[cfg_attr(test, assert_instr(vmovupd))]
36514#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36515#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36516pub const unsafe fn _mm512_mask_loadu_pd(
36517 src: __m512d,
36518 k: __mmask8,
36519 mem_addr: *const f64,
36520) -> __m512d {
36521 let mask: Simd = simd_select_bitmask(m:k, yes:i64x8::splat(!0), no:i64x8::ZERO);
36522 simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, src.as_f64x8()).as_m512d()
36523}
36524
36525/// Load packed double-precision (64-bit) floating-point elements from memory into dst using zeromask k
36526/// (elements are zeroed out when the corresponding mask bit is not set).
36527/// mem_addr does not need to be aligned on any particular boundary.
36528///
36529/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_loadu_pd)
36530#[inline]
36531#[target_feature(enable = "avx512f")]
36532#[cfg_attr(test, assert_instr(vmovupd))]
36533#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36534#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36535pub const unsafe fn _mm512_maskz_loadu_pd(k: __mmask8, mem_addr: *const f64) -> __m512d {
36536 _mm512_mask_loadu_pd(src:_mm512_setzero_pd(), k, mem_addr)
36537}
36538
36539/// Load packed 32-bit integers from memory into dst using writemask k
36540/// (elements are copied from src when the corresponding mask bit is not set).
36541/// mem_addr does not need to be aligned on any particular boundary.
36542///
36543/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_loadu_epi32)
36544#[inline]
36545#[target_feature(enable = "avx512f,avx512vl")]
36546#[cfg_attr(test, assert_instr(vmovdqu32))]
36547#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36548#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36549pub const unsafe fn _mm256_mask_loadu_epi32(
36550 src: __m256i,
36551 k: __mmask8,
36552 mem_addr: *const i32,
36553) -> __m256i {
36554 let mask: Simd = simd_select_bitmask(m:k, yes:i32x8::splat(!0), no:i32x8::ZERO);
36555 simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, src.as_i32x8()).as_m256i()
36556}
36557
36558/// Load packed 32-bit integers from memory into dst using zeromask k
36559/// (elements are zeroed out when the corresponding mask bit is not set).
36560/// mem_addr does not need to be aligned on any particular boundary.
36561///
36562/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_loadu_epi32)
36563#[inline]
36564#[target_feature(enable = "avx512f,avx512vl")]
36565#[cfg_attr(test, assert_instr(vmovdqu32))]
36566#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36567#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36568pub const unsafe fn _mm256_maskz_loadu_epi32(k: __mmask8, mem_addr: *const i32) -> __m256i {
36569 _mm256_mask_loadu_epi32(src:_mm256_setzero_si256(), k, mem_addr)
36570}
36571
36572/// Load packed 64-bit integers from memory into dst using writemask k
36573/// (elements are copied from src when the corresponding mask bit is not set).
36574/// mem_addr does not need to be aligned on any particular boundary.
36575///
36576/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_loadu_epi64)
36577#[inline]
36578#[target_feature(enable = "avx512f,avx512vl")]
36579#[cfg_attr(test, assert_instr(vmovdqu64))]
36580#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36581#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36582pub const unsafe fn _mm256_mask_loadu_epi64(
36583 src: __m256i,
36584 k: __mmask8,
36585 mem_addr: *const i64,
36586) -> __m256i {
36587 let mask: Simd = simd_select_bitmask(m:k, yes:i64x4::splat(!0), no:i64x4::ZERO);
36588 simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, src.as_i64x4()).as_m256i()
36589}
36590
36591/// Load packed 64-bit integers from memory into dst using zeromask k
36592/// (elements are zeroed out when the corresponding mask bit is not set).
36593/// mem_addr does not need to be aligned on any particular boundary.
36594///
36595/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_loadu_epi64)
36596#[inline]
36597#[target_feature(enable = "avx512f,avx512vl")]
36598#[cfg_attr(test, assert_instr(vmovdqu64))]
36599#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36600#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36601pub const unsafe fn _mm256_maskz_loadu_epi64(k: __mmask8, mem_addr: *const i64) -> __m256i {
36602 _mm256_mask_loadu_epi64(src:_mm256_setzero_si256(), k, mem_addr)
36603}
36604
36605/// Load packed single-precision (32-bit) floating-point elements from memory into dst using writemask k
36606/// (elements are copied from src when the corresponding mask bit is not set).
36607/// mem_addr does not need to be aligned on any particular boundary.
36608///
36609/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_loadu_ps)
36610#[inline]
36611#[target_feature(enable = "avx512f,avx512vl")]
36612#[cfg_attr(test, assert_instr(vmovups))]
36613#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36614#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36615pub const unsafe fn _mm256_mask_loadu_ps(src: __m256, k: __mmask8, mem_addr: *const f32) -> __m256 {
36616 let mask: Simd = simd_select_bitmask(m:k, yes:i32x8::splat(!0), no:i32x8::ZERO);
36617 simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, src.as_f32x8()).as_m256()
36618}
36619
36620/// Load packed single-precision (32-bit) floating-point elements from memory into dst using zeromask k
36621/// (elements are zeroed out when the corresponding mask bit is not set).
36622/// mem_addr does not need to be aligned on any particular boundary.
36623///
36624/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_loadu_ps)
36625#[inline]
36626#[target_feature(enable = "avx512f,avx512vl")]
36627#[cfg_attr(test, assert_instr(vmovups))]
36628#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36629#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36630pub const unsafe fn _mm256_maskz_loadu_ps(k: __mmask8, mem_addr: *const f32) -> __m256 {
36631 _mm256_mask_loadu_ps(src:_mm256_setzero_ps(), k, mem_addr)
36632}
36633
36634/// Load packed double-precision (64-bit) floating-point elements from memory into dst using writemask k
36635/// (elements are copied from src when the corresponding mask bit is not set).
36636/// mem_addr does not need to be aligned on any particular boundary.
36637///
36638/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_loadu_pd)
36639#[inline]
36640#[target_feature(enable = "avx512f,avx512vl")]
36641#[cfg_attr(test, assert_instr(vmovupd))]
36642#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36643#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36644pub const unsafe fn _mm256_mask_loadu_pd(
36645 src: __m256d,
36646 k: __mmask8,
36647 mem_addr: *const f64,
36648) -> __m256d {
36649 let mask: Simd = simd_select_bitmask(m:k, yes:i64x4::splat(!0), no:i64x4::ZERO);
36650 simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, src.as_f64x4()).as_m256d()
36651}
36652
36653/// Load packed double-precision (64-bit) floating-point elements from memory into dst using zeromask k
36654/// (elements are zeroed out when the corresponding mask bit is not set).
36655/// mem_addr does not need to be aligned on any particular boundary.
36656///
36657/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_loadu_pd)
36658#[inline]
36659#[target_feature(enable = "avx512f,avx512vl")]
36660#[cfg_attr(test, assert_instr(vmovupd))]
36661#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36662#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36663pub const unsafe fn _mm256_maskz_loadu_pd(k: __mmask8, mem_addr: *const f64) -> __m256d {
36664 _mm256_mask_loadu_pd(src:_mm256_setzero_pd(), k, mem_addr)
36665}
36666
36667/// Load packed 32-bit integers from memory into dst using writemask k
36668/// (elements are copied from src when the corresponding mask bit is not set).
36669/// mem_addr does not need to be aligned on any particular boundary.
36670///
36671/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_loadu_epi32)
36672#[inline]
36673#[target_feature(enable = "avx512f,avx512vl")]
36674#[cfg_attr(test, assert_instr(vmovdqu32))]
36675#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36676#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36677pub const unsafe fn _mm_mask_loadu_epi32(
36678 src: __m128i,
36679 k: __mmask8,
36680 mem_addr: *const i32,
36681) -> __m128i {
36682 let mask: Simd = simd_select_bitmask(m:k, yes:i32x4::splat(!0), no:i32x4::ZERO);
36683 simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, src.as_i32x4()).as_m128i()
36684}
36685
36686/// Load packed 32-bit integers from memory into dst using zeromask k
36687/// (elements are zeroed out when the corresponding mask bit is not set).
36688/// mem_addr does not need to be aligned on any particular boundary.
36689///
36690/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_loadu_epi32)
36691#[inline]
36692#[target_feature(enable = "avx512f,avx512vl")]
36693#[cfg_attr(test, assert_instr(vmovdqu32))]
36694#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36695#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36696pub const unsafe fn _mm_maskz_loadu_epi32(k: __mmask8, mem_addr: *const i32) -> __m128i {
36697 _mm_mask_loadu_epi32(src:_mm_setzero_si128(), k, mem_addr)
36698}
36699
36700/// Load packed 64-bit integers from memory into dst using writemask k
36701/// (elements are copied from src when the corresponding mask bit is not set).
36702/// mem_addr does not need to be aligned on any particular boundary.
36703///
36704/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_loadu_epi64)
36705#[inline]
36706#[target_feature(enable = "avx512f,avx512vl")]
36707#[cfg_attr(test, assert_instr(vmovdqu64))]
36708#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36709#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36710pub const unsafe fn _mm_mask_loadu_epi64(
36711 src: __m128i,
36712 k: __mmask8,
36713 mem_addr: *const i64,
36714) -> __m128i {
36715 let mask: Simd = simd_select_bitmask(m:k, yes:i64x2::splat(!0), no:i64x2::ZERO);
36716 simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, src.as_i64x2()).as_m128i()
36717}
36718
36719/// Load packed 64-bit integers from memory into dst using zeromask k
36720/// (elements are zeroed out when the corresponding mask bit is not set).
36721/// mem_addr does not need to be aligned on any particular boundary.
36722///
36723/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_loadu_epi64)
36724#[inline]
36725#[target_feature(enable = "avx512f,avx512vl")]
36726#[cfg_attr(test, assert_instr(vmovdqu64))]
36727#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36728#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36729pub const unsafe fn _mm_maskz_loadu_epi64(k: __mmask8, mem_addr: *const i64) -> __m128i {
36730 _mm_mask_loadu_epi64(src:_mm_setzero_si128(), k, mem_addr)
36731}
36732
36733/// Load packed single-precision (32-bit) floating-point elements from memory into dst using writemask k
36734/// (elements are copied from src when the corresponding mask bit is not set).
36735/// mem_addr does not need to be aligned on any particular boundary.
36736///
36737/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_loadu_ps)
36738#[inline]
36739#[target_feature(enable = "avx512f,avx512vl")]
36740#[cfg_attr(test, assert_instr(vmovups))]
36741#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36742#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36743pub const unsafe fn _mm_mask_loadu_ps(src: __m128, k: __mmask8, mem_addr: *const f32) -> __m128 {
36744 let mask: Simd = simd_select_bitmask(m:k, yes:i32x4::splat(!0), no:i32x4::ZERO);
36745 simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, src.as_f32x4()).as_m128()
36746}
36747
36748/// Load packed single-precision (32-bit) floating-point elements from memory into dst using zeromask k
36749/// (elements are zeroed out when the corresponding mask bit is not set).
36750/// mem_addr does not need to be aligned on any particular boundary.
36751///
36752/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_loadu_ps)
36753#[inline]
36754#[target_feature(enable = "avx512f,avx512vl")]
36755#[cfg_attr(test, assert_instr(vmovups))]
36756#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36757#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36758pub const unsafe fn _mm_maskz_loadu_ps(k: __mmask8, mem_addr: *const f32) -> __m128 {
36759 _mm_mask_loadu_ps(src:_mm_setzero_ps(), k, mem_addr)
36760}
36761
36762/// Load packed double-precision (64-bit) floating-point elements from memory into dst using writemask k
36763/// (elements are copied from src when the corresponding mask bit is not set).
36764/// mem_addr does not need to be aligned on any particular boundary.
36765///
36766/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_loadu_pd)
36767#[inline]
36768#[target_feature(enable = "avx512f,avx512vl")]
36769#[cfg_attr(test, assert_instr(vmovupd))]
36770#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36771#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36772pub const unsafe fn _mm_mask_loadu_pd(src: __m128d, k: __mmask8, mem_addr: *const f64) -> __m128d {
36773 let mask: Simd = simd_select_bitmask(m:k, yes:i64x2::splat(!0), no:i64x2::ZERO);
36774 simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, src.as_f64x2()).as_m128d()
36775}
36776
36777/// Load packed double-precision (64-bit) floating-point elements from memory into dst using zeromask k
36778/// (elements are zeroed out when the corresponding mask bit is not set).
36779/// mem_addr does not need to be aligned on any particular boundary.
36780///
36781/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_loadu_pd)
36782#[inline]
36783#[target_feature(enable = "avx512f,avx512vl")]
36784#[cfg_attr(test, assert_instr(vmovupd))]
36785#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36786#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36787pub const unsafe fn _mm_maskz_loadu_pd(k: __mmask8, mem_addr: *const f64) -> __m128d {
36788 _mm_mask_loadu_pd(src:_mm_setzero_pd(), k, mem_addr)
36789}
36790
36791/// Load packed 32-bit integers from memory into dst using writemask k
36792/// (elements are copied from src when the corresponding mask bit is not set).
36793/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
36794///
36795/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_load_epi32)
36796#[inline]
36797#[target_feature(enable = "avx512f")]
36798#[cfg_attr(test, assert_instr(vmovdqa32))]
36799#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36800#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36801pub const unsafe fn _mm512_mask_load_epi32(
36802 src: __m512i,
36803 k: __mmask16,
36804 mem_addr: *const i32,
36805) -> __m512i {
36806 let mask: Simd = simd_select_bitmask(m:k, yes:i32x16::splat(!0), no:i32x16::ZERO);
36807 simd_masked_load!(SimdAlign::Vector, mask, mem_addr, src.as_i32x16()).as_m512i()
36808}
36809
36810/// Load packed 32-bit integers from memory into dst using zeromask k
36811/// (elements are zeroed out when the corresponding mask bit is not set).
36812/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
36813///
36814/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_load_epi32)
36815#[inline]
36816#[target_feature(enable = "avx512f")]
36817#[cfg_attr(test, assert_instr(vmovdqa32))]
36818#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36819#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36820pub const unsafe fn _mm512_maskz_load_epi32(k: __mmask16, mem_addr: *const i32) -> __m512i {
36821 _mm512_mask_load_epi32(src:_mm512_setzero_si512(), k, mem_addr)
36822}
36823
36824/// Load packed 64-bit integers from memory into dst using writemask k
36825/// (elements are copied from src when the corresponding mask bit is not set).
36826/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
36827///
36828/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_load_epi64)
36829#[inline]
36830#[target_feature(enable = "avx512f")]
36831#[cfg_attr(test, assert_instr(vmovdqa64))]
36832#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36833#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36834pub const unsafe fn _mm512_mask_load_epi64(
36835 src: __m512i,
36836 k: __mmask8,
36837 mem_addr: *const i64,
36838) -> __m512i {
36839 let mask: Simd = simd_select_bitmask(m:k, yes:i64x8::splat(!0), no:i64x8::ZERO);
36840 simd_masked_load!(SimdAlign::Vector, mask, mem_addr, src.as_i64x8()).as_m512i()
36841}
36842
36843/// Load packed 64-bit integers from memory into dst using zeromask k
36844/// (elements are zeroed out when the corresponding mask bit is not set).
36845/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
36846///
36847/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_load_epi64)
36848#[inline]
36849#[target_feature(enable = "avx512f")]
36850#[cfg_attr(test, assert_instr(vmovdqa64))]
36851#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36852#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36853pub const unsafe fn _mm512_maskz_load_epi64(k: __mmask8, mem_addr: *const i64) -> __m512i {
36854 _mm512_mask_load_epi64(src:_mm512_setzero_si512(), k, mem_addr)
36855}
36856
36857/// Load packed single-precision (32-bit) floating-point elements from memory into dst using writemask k
36858/// (elements are copied from src when the corresponding mask bit is not set).
36859/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
36860///
36861/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_load_ps)
36862#[inline]
36863#[target_feature(enable = "avx512f")]
36864#[cfg_attr(test, assert_instr(vmovaps))]
36865#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36866#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36867pub const unsafe fn _mm512_mask_load_ps(src: __m512, k: __mmask16, mem_addr: *const f32) -> __m512 {
36868 let mask: Simd = simd_select_bitmask(m:k, yes:i32x16::splat(!0), no:i32x16::ZERO);
36869 simd_masked_load!(SimdAlign::Vector, mask, mem_addr, src.as_f32x16()).as_m512()
36870}
36871
36872/// Load packed single-precision (32-bit) floating-point elements from memory into dst using zeromask k
36873/// (elements are zeroed out when the corresponding mask bit is not set).
36874/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
36875///
36876/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_load_ps)
36877#[inline]
36878#[target_feature(enable = "avx512f")]
36879#[cfg_attr(test, assert_instr(vmovaps))]
36880#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36881#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36882pub const unsafe fn _mm512_maskz_load_ps(k: __mmask16, mem_addr: *const f32) -> __m512 {
36883 _mm512_mask_load_ps(src:_mm512_setzero_ps(), k, mem_addr)
36884}
36885
36886/// Load packed double-precision (64-bit) floating-point elements from memory into dst using writemask k
36887/// (elements are copied from src when the corresponding mask bit is not set).
36888/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
36889///
36890/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_load_pd)
36891#[inline]
36892#[target_feature(enable = "avx512f")]
36893#[cfg_attr(test, assert_instr(vmovapd))]
36894#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36895#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36896pub const unsafe fn _mm512_mask_load_pd(
36897 src: __m512d,
36898 k: __mmask8,
36899 mem_addr: *const f64,
36900) -> __m512d {
36901 let mask: Simd = simd_select_bitmask(m:k, yes:i64x8::splat(!0), no:i64x8::ZERO);
36902 simd_masked_load!(SimdAlign::Vector, mask, mem_addr, src.as_f64x8()).as_m512d()
36903}
36904
36905/// Load packed double-precision (64-bit) floating-point elements from memory into dst using zeromask k
36906/// (elements are zeroed out when the corresponding mask bit is not set).
36907/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
36908///
36909/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_load_pd)
36910#[inline]
36911#[target_feature(enable = "avx512f")]
36912#[cfg_attr(test, assert_instr(vmovapd))]
36913#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36914#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36915pub const unsafe fn _mm512_maskz_load_pd(k: __mmask8, mem_addr: *const f64) -> __m512d {
36916 _mm512_mask_load_pd(src:_mm512_setzero_pd(), k, mem_addr)
36917}
36918
36919/// Load packed 32-bit integers from memory into dst using writemask k
36920/// (elements are copied from src when the corresponding mask bit is not set).
36921/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
36922///
36923/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_load_epi32)
36924#[inline]
36925#[target_feature(enable = "avx512f,avx512vl")]
36926#[cfg_attr(test, assert_instr(vmovdqa32))]
36927#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36928#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36929pub const unsafe fn _mm256_mask_load_epi32(
36930 src: __m256i,
36931 k: __mmask8,
36932 mem_addr: *const i32,
36933) -> __m256i {
36934 let mask: Simd = simd_select_bitmask(m:k, yes:i32x8::splat(!0), no:i32x8::ZERO);
36935 simd_masked_load!(SimdAlign::Vector, mask, mem_addr, src.as_i32x8()).as_m256i()
36936}
36937
36938/// Load packed 32-bit integers from memory into dst using zeromask k
36939/// (elements are zeroed out when the corresponding mask bit is not set).
36940/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
36941///
36942/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_load_epi32)
36943#[inline]
36944#[target_feature(enable = "avx512f,avx512vl")]
36945#[cfg_attr(test, assert_instr(vmovdqa32))]
36946#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36947#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36948pub const unsafe fn _mm256_maskz_load_epi32(k: __mmask8, mem_addr: *const i32) -> __m256i {
36949 _mm256_mask_load_epi32(src:_mm256_setzero_si256(), k, mem_addr)
36950}
36951
36952/// Load packed 64-bit integers from memory into dst using writemask k
36953/// (elements are copied from src when the corresponding mask bit is not set).
36954/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
36955///
36956/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_load_epi64)
36957#[inline]
36958#[target_feature(enable = "avx512f,avx512vl")]
36959#[cfg_attr(test, assert_instr(vmovdqa64))]
36960#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36961#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36962pub const unsafe fn _mm256_mask_load_epi64(
36963 src: __m256i,
36964 k: __mmask8,
36965 mem_addr: *const i64,
36966) -> __m256i {
36967 let mask: Simd = simd_select_bitmask(m:k, yes:i64x4::splat(!0), no:i64x4::ZERO);
36968 simd_masked_load!(SimdAlign::Vector, mask, mem_addr, src.as_i64x4()).as_m256i()
36969}
36970
36971/// Load packed 64-bit integers from memory into dst using zeromask k
36972/// (elements are zeroed out when the corresponding mask bit is not set).
36973/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
36974///
36975/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_load_epi64)
36976#[inline]
36977#[target_feature(enable = "avx512f,avx512vl")]
36978#[cfg_attr(test, assert_instr(vmovdqa64))]
36979#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36980#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36981pub const unsafe fn _mm256_maskz_load_epi64(k: __mmask8, mem_addr: *const i64) -> __m256i {
36982 _mm256_mask_load_epi64(src:_mm256_setzero_si256(), k, mem_addr)
36983}
36984
36985/// Load packed single-precision (32-bit) floating-point elements from memory into dst using writemask k
36986/// (elements are copied from src when the corresponding mask bit is not set).
36987/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
36988///
36989/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_load_ps)
36990#[inline]
36991#[target_feature(enable = "avx512f,avx512vl")]
36992#[cfg_attr(test, assert_instr(vmovaps))]
36993#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36994#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36995pub const unsafe fn _mm256_mask_load_ps(src: __m256, k: __mmask8, mem_addr: *const f32) -> __m256 {
36996 let mask: Simd = simd_select_bitmask(m:k, yes:i32x8::splat(!0), no:i32x8::ZERO);
36997 simd_masked_load!(SimdAlign::Vector, mask, mem_addr, src.as_f32x8()).as_m256()
36998}
36999
37000/// Load packed single-precision (32-bit) floating-point elements from memory into dst using zeromask k
37001/// (elements are zeroed out when the corresponding mask bit is not set).
37002/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
37003///
37004/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_load_ps)
37005#[inline]
37006#[target_feature(enable = "avx512f,avx512vl")]
37007#[cfg_attr(test, assert_instr(vmovaps))]
37008#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37009#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
37010pub const unsafe fn _mm256_maskz_load_ps(k: __mmask8, mem_addr: *const f32) -> __m256 {
37011 _mm256_mask_load_ps(src:_mm256_setzero_ps(), k, mem_addr)
37012}
37013
37014/// Load packed double-precision (64-bit) floating-point elements from memory into dst using writemask k
37015/// (elements are copied from src when the corresponding mask bit is not set).
37016/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
37017///
37018/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_load_pd)
37019#[inline]
37020#[target_feature(enable = "avx512f,avx512vl")]
37021#[cfg_attr(test, assert_instr(vmovapd))]
37022#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37023#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
37024pub const unsafe fn _mm256_mask_load_pd(
37025 src: __m256d,
37026 k: __mmask8,
37027 mem_addr: *const f64,
37028) -> __m256d {
37029 let mask: Simd = simd_select_bitmask(m:k, yes:i64x4::splat(!0), no:i64x4::ZERO);
37030 simd_masked_load!(SimdAlign::Vector, mask, mem_addr, src.as_f64x4()).as_m256d()
37031}
37032
37033/// Load packed double-precision (64-bit) floating-point elements from memory into dst using zeromask k
37034/// (elements are zeroed out when the corresponding mask bit is not set).
37035/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
37036///
37037/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_load_pd)
37038#[inline]
37039#[target_feature(enable = "avx512f,avx512vl")]
37040#[cfg_attr(test, assert_instr(vmovapd))]
37041#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37042#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
37043pub const unsafe fn _mm256_maskz_load_pd(k: __mmask8, mem_addr: *const f64) -> __m256d {
37044 _mm256_mask_load_pd(src:_mm256_setzero_pd(), k, mem_addr)
37045}
37046
37047/// Load packed 32-bit integers from memory into dst using writemask k
37048/// (elements are copied from src when the corresponding mask bit is not set).
37049/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
37050///
37051/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_load_epi32)
37052#[inline]
37053#[target_feature(enable = "avx512f,avx512vl")]
37054#[cfg_attr(test, assert_instr(vmovdqa32))]
37055#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37056#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
37057pub const unsafe fn _mm_mask_load_epi32(
37058 src: __m128i,
37059 k: __mmask8,
37060 mem_addr: *const i32,
37061) -> __m128i {
37062 let mask: Simd = simd_select_bitmask(m:k, yes:i32x4::splat(!0), no:i32x4::ZERO);
37063 simd_masked_load!(SimdAlign::Vector, mask, mem_addr, src.as_i32x4()).as_m128i()
37064}
37065
37066/// Load packed 32-bit integers from memory into dst using zeromask k
37067/// (elements are zeroed out when the corresponding mask bit is not set).
37068/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
37069///
37070/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_load_epi32)
37071#[inline]
37072#[target_feature(enable = "avx512f,avx512vl")]
37073#[cfg_attr(test, assert_instr(vmovdqa32))]
37074#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37075#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
37076pub const unsafe fn _mm_maskz_load_epi32(k: __mmask8, mem_addr: *const i32) -> __m128i {
37077 _mm_mask_load_epi32(src:_mm_setzero_si128(), k, mem_addr)
37078}
37079
37080/// Load packed 64-bit integers from memory into dst using writemask k
37081/// (elements are copied from src when the corresponding mask bit is not set).
37082/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
37083///
37084/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_load_epi64)
37085#[inline]
37086#[target_feature(enable = "avx512f,avx512vl")]
37087#[cfg_attr(test, assert_instr(vmovdqa64))]
37088#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37089#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
37090pub const unsafe fn _mm_mask_load_epi64(
37091 src: __m128i,
37092 k: __mmask8,
37093 mem_addr: *const i64,
37094) -> __m128i {
37095 let mask: Simd = simd_select_bitmask(m:k, yes:i64x2::splat(!0), no:i64x2::ZERO);
37096 simd_masked_load!(SimdAlign::Vector, mask, mem_addr, src.as_i64x2()).as_m128i()
37097}
37098
37099/// Load packed 64-bit integers from memory into dst using zeromask k
37100/// (elements are zeroed out when the corresponding mask bit is not set).
37101/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
37102///
37103/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_load_epi64)
37104#[inline]
37105#[target_feature(enable = "avx512f,avx512vl")]
37106#[cfg_attr(test, assert_instr(vmovdqa64))]
37107#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37108#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
37109pub const unsafe fn _mm_maskz_load_epi64(k: __mmask8, mem_addr: *const i64) -> __m128i {
37110 _mm_mask_load_epi64(src:_mm_setzero_si128(), k, mem_addr)
37111}
37112
37113/// Load packed single-precision (32-bit) floating-point elements from memory into dst using writemask k
37114/// (elements are copied from src when the corresponding mask bit is not set).
37115/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
37116///
37117/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_load_ps)
37118#[inline]
37119#[target_feature(enable = "avx512f,avx512vl")]
37120#[cfg_attr(test, assert_instr(vmovaps))]
37121#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37122#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
37123pub const unsafe fn _mm_mask_load_ps(src: __m128, k: __mmask8, mem_addr: *const f32) -> __m128 {
37124 let mask: Simd = simd_select_bitmask(m:k, yes:i32x4::splat(!0), no:i32x4::ZERO);
37125 simd_masked_load!(SimdAlign::Vector, mask, mem_addr, src.as_f32x4()).as_m128()
37126}
37127
37128/// Load packed single-precision (32-bit) floating-point elements from memory into dst using zeromask k
37129/// (elements are zeroed out when the corresponding mask bit is not set).
37130/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
37131///
37132/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_load_ps)
37133#[inline]
37134#[target_feature(enable = "avx512f,avx512vl")]
37135#[cfg_attr(test, assert_instr(vmovaps))]
37136#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37137#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
37138pub const unsafe fn _mm_maskz_load_ps(k: __mmask8, mem_addr: *const f32) -> __m128 {
37139 _mm_mask_load_ps(src:_mm_setzero_ps(), k, mem_addr)
37140}
37141
37142/// Load packed double-precision (64-bit) floating-point elements from memory into dst using writemask k
37143/// (elements are copied from src when the corresponding mask bit is not set).
37144/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
37145///
37146/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_load_pd)
37147#[inline]
37148#[target_feature(enable = "avx512f,avx512vl")]
37149#[cfg_attr(test, assert_instr(vmovapd))]
37150#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37151#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
37152pub const unsafe fn _mm_mask_load_pd(src: __m128d, k: __mmask8, mem_addr: *const f64) -> __m128d {
37153 let mask: Simd = simd_select_bitmask(m:k, yes:i64x2::splat(!0), no:i64x2::ZERO);
37154 simd_masked_load!(SimdAlign::Vector, mask, mem_addr, src.as_f64x2()).as_m128d()
37155}
37156
37157/// Load packed double-precision (64-bit) floating-point elements from memory into dst using zeromask k
37158/// (elements are zeroed out when the corresponding mask bit is not set).
37159/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
37160///
37161/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_load_pd)
37162#[inline]
37163#[target_feature(enable = "avx512f,avx512vl")]
37164#[cfg_attr(test, assert_instr(vmovapd))]
37165#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37166#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
37167pub const unsafe fn _mm_maskz_load_pd(k: __mmask8, mem_addr: *const f64) -> __m128d {
37168 _mm_mask_load_pd(src:_mm_setzero_pd(), k, mem_addr)
37169}
37170
37171/// Load a single-precision (32-bit) floating-point element from memory into the lower element of dst
37172/// using writemask k (the element is copied from src when mask bit 0 is not set), and set the upper
37173/// 3 packed elements of dst to zero. mem_addr must be aligned on a 16-byte boundary or a general-protection
37174/// exception may be generated.
37175///
37176/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_load_ss)
37177#[inline]
37178#[cfg_attr(test, assert_instr(vmovss))]
37179#[target_feature(enable = "avx512f")]
37180#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37181pub unsafe fn _mm_mask_load_ss(src: __m128, k: __mmask8, mem_addr: *const f32) -> __m128 {
37182 let mut dst: __m128 = src;
37183 asm!(
37184 vpl!("vmovss {dst}{{{k}}}"),
37185 p = in(reg) mem_addr,
37186 k = in(kreg) k,
37187 dst = inout(xmm_reg) dst,
37188 options(pure, readonly, nostack, preserves_flags),
37189 );
37190 dst
37191}
37192
37193/// Load a single-precision (32-bit) floating-point element from memory into the lower element of dst
37194/// using zeromask k (the element is zeroed out when mask bit 0 is not set), and set the upper 3 packed
37195/// elements of dst to zero. mem_addr must be aligned on a 16-byte boundary or a general-protection
37196/// exception may be generated.
37197///
37198/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_load_ss)
37199#[inline]
37200#[cfg_attr(test, assert_instr(vmovss))]
37201#[target_feature(enable = "avx512f")]
37202#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37203pub unsafe fn _mm_maskz_load_ss(k: __mmask8, mem_addr: *const f32) -> __m128 {
37204 let mut dst: __m128;
37205 asm!(
37206 vpl!("vmovss {dst}{{{k}}} {{z}}"),
37207 p = in(reg) mem_addr,
37208 k = in(kreg) k,
37209 dst = out(xmm_reg) dst,
37210 options(pure, readonly, nostack, preserves_flags),
37211 );
37212 dst
37213}
37214
37215/// Load a double-precision (64-bit) floating-point element from memory into the lower element of dst
37216/// using writemask k (the element is copied from src when mask bit 0 is not set), and set the upper
37217/// element of dst to zero. mem_addr must be aligned on a 16-byte boundary or a general-protection
37218/// exception may be generated.
37219///
37220/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_load_sd)
37221#[inline]
37222#[cfg_attr(test, assert_instr(vmovsd))]
37223#[target_feature(enable = "avx512f")]
37224#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37225pub unsafe fn _mm_mask_load_sd(src: __m128d, k: __mmask8, mem_addr: *const f64) -> __m128d {
37226 let mut dst: __m128d = src;
37227 asm!(
37228 vpl!("vmovsd {dst}{{{k}}}"),
37229 p = in(reg) mem_addr,
37230 k = in(kreg) k,
37231 dst = inout(xmm_reg) dst,
37232 options(pure, readonly, nostack, preserves_flags),
37233 );
37234 dst
37235}
37236
37237/// Load a double-precision (64-bit) floating-point element from memory into the lower element of dst
37238/// using zeromask k (the element is zeroed out when mask bit 0 is not set), and set the upper element
37239/// of dst to zero. mem_addr must be aligned on a 16-byte boundary or a general-protection exception
37240/// may be generated.
37241///
37242/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_load_sd)
37243#[inline]
37244#[cfg_attr(test, assert_instr(vmovsd))]
37245#[target_feature(enable = "avx512f")]
37246#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37247pub unsafe fn _mm_maskz_load_sd(k: __mmask8, mem_addr: *const f64) -> __m128d {
37248 let mut dst: __m128d;
37249 asm!(
37250 vpl!("vmovsd {dst}{{{k}}} {{z}}"),
37251 p = in(reg) mem_addr,
37252 k = in(kreg) k,
37253 dst = out(xmm_reg) dst,
37254 options(pure, readonly, nostack, preserves_flags),
37255 );
37256 dst
37257}
37258
37259/// Store packed 32-bit integers from a into memory using writemask k.
37260/// mem_addr does not need to be aligned on any particular boundary.
37261///
37262/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_storeu_epi32)
37263#[inline]
37264#[target_feature(enable = "avx512f")]
37265#[cfg_attr(test, assert_instr(vmovdqu32))]
37266#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37267#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
37268pub const unsafe fn _mm512_mask_storeu_epi32(mem_addr: *mut i32, mask: __mmask16, a: __m512i) {
37269 let mask: Simd = simd_select_bitmask(m:mask, yes:i32x16::splat(!0), no:i32x16::ZERO);
37270 simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_i32x16());
37271}
37272
37273/// Store packed 64-bit integers from a into memory using writemask k.
37274/// mem_addr does not need to be aligned on any particular boundary.
37275///
37276/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_storeu_epi64)
37277#[inline]
37278#[target_feature(enable = "avx512f")]
37279#[cfg_attr(test, assert_instr(vmovdqu64))]
37280#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37281#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
37282pub const unsafe fn _mm512_mask_storeu_epi64(mem_addr: *mut i64, mask: __mmask8, a: __m512i) {
37283 let mask: Simd = simd_select_bitmask(m:mask, yes:i64x8::splat(!0), no:i64x8::ZERO);
37284 simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_i64x8());
37285}
37286
37287/// Store packed single-precision (32-bit) floating-point elements from a into memory using writemask k.
37288/// mem_addr does not need to be aligned on any particular boundary.
37289///
37290/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_storeu_ps)
37291#[inline]
37292#[target_feature(enable = "avx512f")]
37293#[cfg_attr(test, assert_instr(vmovups))]
37294#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37295#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
37296pub const unsafe fn _mm512_mask_storeu_ps(mem_addr: *mut f32, mask: __mmask16, a: __m512) {
37297 let mask: Simd = simd_select_bitmask(m:mask, yes:i32x16::splat(!0), no:i32x16::ZERO);
37298 simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_f32x16());
37299}
37300
37301/// Store packed double-precision (64-bit) floating-point elements from a into memory using writemask k.
37302/// mem_addr does not need to be aligned on any particular boundary.
37303///
37304/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_storeu_pd)
37305#[inline]
37306#[target_feature(enable = "avx512f")]
37307#[cfg_attr(test, assert_instr(vmovupd))]
37308#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37309#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
37310pub const unsafe fn _mm512_mask_storeu_pd(mem_addr: *mut f64, mask: __mmask8, a: __m512d) {
37311 let mask: Simd = simd_select_bitmask(m:mask, yes:i64x8::splat(!0), no:i64x8::ZERO);
37312 simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_f64x8());
37313}
37314
37315/// Store packed 32-bit integers from a into memory using writemask k.
37316/// mem_addr does not need to be aligned on any particular boundary.
37317///
37318/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_storeu_epi32)
37319#[inline]
37320#[target_feature(enable = "avx512f,avx512vl")]
37321#[cfg_attr(test, assert_instr(vmovdqu32))]
37322#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37323#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
37324pub const unsafe fn _mm256_mask_storeu_epi32(mem_addr: *mut i32, mask: __mmask8, a: __m256i) {
37325 let mask: Simd = simd_select_bitmask(m:mask, yes:i32x8::splat(!0), no:i32x8::ZERO);
37326 simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_i32x8());
37327}
37328
37329/// Store packed 64-bit integers from a into memory using writemask k.
37330/// mem_addr does not need to be aligned on any particular boundary.
37331///
37332/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_storeu_epi64)
37333#[inline]
37334#[target_feature(enable = "avx512f,avx512vl")]
37335#[cfg_attr(test, assert_instr(vmovdqu64))]
37336#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37337#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
37338pub const unsafe fn _mm256_mask_storeu_epi64(mem_addr: *mut i64, mask: __mmask8, a: __m256i) {
37339 let mask: Simd = simd_select_bitmask(m:mask, yes:i64x4::splat(!0), no:i64x4::ZERO);
37340 simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_i64x4());
37341}
37342
37343/// Store packed single-precision (32-bit) floating-point elements from a into memory using writemask k.
37344/// mem_addr does not need to be aligned on any particular boundary.
37345///
37346/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_storeu_ps)
37347#[inline]
37348#[target_feature(enable = "avx512f,avx512vl")]
37349#[cfg_attr(test, assert_instr(vmovups))]
37350#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37351#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
37352pub const unsafe fn _mm256_mask_storeu_ps(mem_addr: *mut f32, mask: __mmask8, a: __m256) {
37353 let mask: Simd = simd_select_bitmask(m:mask, yes:i32x8::splat(!0), no:i32x8::ZERO);
37354 simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_f32x8());
37355}
37356
37357/// Store packed double-precision (64-bit) floating-point elements from a into memory using writemask k.
37358/// mem_addr does not need to be aligned on any particular boundary.
37359///
37360/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_storeu_pd)
37361#[inline]
37362#[target_feature(enable = "avx512f,avx512vl")]
37363#[cfg_attr(test, assert_instr(vmovupd))]
37364#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37365#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
37366pub const unsafe fn _mm256_mask_storeu_pd(mem_addr: *mut f64, mask: __mmask8, a: __m256d) {
37367 let mask: Simd = simd_select_bitmask(m:mask, yes:i64x4::splat(!0), no:i64x4::ZERO);
37368 simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_f64x4());
37369}
37370
37371/// Store packed 32-bit integers from a into memory using writemask k.
37372/// mem_addr does not need to be aligned on any particular boundary.
37373///
37374/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_storeu_epi32)
37375#[inline]
37376#[target_feature(enable = "avx512f,avx512vl")]
37377#[cfg_attr(test, assert_instr(vmovdqu32))]
37378#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37379#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
37380pub const unsafe fn _mm_mask_storeu_epi32(mem_addr: *mut i32, mask: __mmask8, a: __m128i) {
37381 let mask: Simd = simd_select_bitmask(m:mask, yes:i32x4::splat(!0), no:i32x4::ZERO);
37382 simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_i32x4());
37383}
37384
37385/// Store packed 64-bit integers from a into memory using writemask k.
37386/// mem_addr does not need to be aligned on any particular boundary.
37387///
37388/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_storeu_epi64)
37389#[inline]
37390#[target_feature(enable = "avx512f,avx512vl")]
37391#[cfg_attr(test, assert_instr(vmovdqu64))]
37392#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37393#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
37394pub const unsafe fn _mm_mask_storeu_epi64(mem_addr: *mut i64, mask: __mmask8, a: __m128i) {
37395 let mask: Simd = simd_select_bitmask(m:mask, yes:i64x2::splat(!0), no:i64x2::ZERO);
37396 simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_i64x2());
37397}
37398
37399/// Store packed single-precision (32-bit) floating-point elements from a into memory using writemask k.
37400/// mem_addr does not need to be aligned on any particular boundary.
37401///
37402/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_storeu_ps)
37403#[inline]
37404#[target_feature(enable = "avx512f,avx512vl")]
37405#[cfg_attr(test, assert_instr(vmovups))]
37406#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37407#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
37408pub const unsafe fn _mm_mask_storeu_ps(mem_addr: *mut f32, mask: __mmask8, a: __m128) {
37409 let mask: Simd = simd_select_bitmask(m:mask, yes:i32x4::splat(!0), no:i32x4::ZERO);
37410 simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_f32x4());
37411}
37412
37413/// Store packed double-precision (64-bit) floating-point elements from a into memory using writemask k.
37414/// mem_addr does not need to be aligned on any particular boundary.
37415///
37416/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_storeu_pd)
37417#[inline]
37418#[target_feature(enable = "avx512f,avx512vl")]
37419#[cfg_attr(test, assert_instr(vmovupd))]
37420#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37421#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
37422pub const unsafe fn _mm_mask_storeu_pd(mem_addr: *mut f64, mask: __mmask8, a: __m128d) {
37423 let mask: Simd = simd_select_bitmask(m:mask, yes:i64x2::splat(!0), no:i64x2::ZERO);
37424 simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_f64x2());
37425}
37426
37427/// Store packed 32-bit integers from a into memory using writemask k.
37428/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
37429///
37430/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_store_epi32)
37431#[inline]
37432#[target_feature(enable = "avx512f")]
37433#[cfg_attr(test, assert_instr(vmovdqa32))]
37434#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37435#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
37436pub const unsafe fn _mm512_mask_store_epi32(mem_addr: *mut i32, mask: __mmask16, a: __m512i) {
37437 let mask: Simd = simd_select_bitmask(m:mask, yes:i32x16::splat(!0), no:i32x16::ZERO);
37438 simd_masked_store!(SimdAlign::Vector, mask, mem_addr, a.as_i32x16());
37439}
37440
37441/// Store packed 64-bit integers from a into memory using writemask k.
37442/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
37443///
37444/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_store_epi64)
37445#[inline]
37446#[target_feature(enable = "avx512f")]
37447#[cfg_attr(test, assert_instr(vmovdqa64))]
37448#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37449#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
37450pub const unsafe fn _mm512_mask_store_epi64(mem_addr: *mut i64, mask: __mmask8, a: __m512i) {
37451 let mask: Simd = simd_select_bitmask(m:mask, yes:i64x8::splat(!0), no:i64x8::ZERO);
37452 simd_masked_store!(SimdAlign::Vector, mask, mem_addr, a.as_i64x8());
37453}
37454
37455/// Store packed single-precision (32-bit) floating-point elements from a into memory using writemask k.
37456/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
37457///
37458/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_store_ps)
37459#[inline]
37460#[target_feature(enable = "avx512f")]
37461#[cfg_attr(test, assert_instr(vmovaps))]
37462#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37463#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
37464pub const unsafe fn _mm512_mask_store_ps(mem_addr: *mut f32, mask: __mmask16, a: __m512) {
37465 let mask: Simd = simd_select_bitmask(m:mask, yes:i32x16::splat(!0), no:i32x16::ZERO);
37466 simd_masked_store!(SimdAlign::Vector, mask, mem_addr, a.as_f32x16());
37467}
37468
37469/// Store packed double-precision (64-bit) floating-point elements from a into memory using writemask k.
37470/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
37471///
37472/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_store_pd)
37473#[inline]
37474#[target_feature(enable = "avx512f")]
37475#[cfg_attr(test, assert_instr(vmovapd))]
37476#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37477#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
37478pub const unsafe fn _mm512_mask_store_pd(mem_addr: *mut f64, mask: __mmask8, a: __m512d) {
37479 let mask: Simd = simd_select_bitmask(m:mask, yes:i64x8::splat(!0), no:i64x8::ZERO);
37480 simd_masked_store!(SimdAlign::Vector, mask, mem_addr, a.as_f64x8());
37481}
37482
37483/// Store packed 32-bit integers from a into memory using writemask k.
37484/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
37485///
37486/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_store_epi32)
37487#[inline]
37488#[target_feature(enable = "avx512f,avx512vl")]
37489#[cfg_attr(test, assert_instr(vmovdqa32))]
37490#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37491#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
37492pub const unsafe fn _mm256_mask_store_epi32(mem_addr: *mut i32, mask: __mmask8, a: __m256i) {
37493 let mask: Simd = simd_select_bitmask(m:mask, yes:i32x8::splat(!0), no:i32x8::ZERO);
37494 simd_masked_store!(SimdAlign::Vector, mask, mem_addr, a.as_i32x8());
37495}
37496
37497/// Store packed 64-bit integers from a into memory using writemask k.
37498/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
37499///
37500/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_store_epi64)
37501#[inline]
37502#[target_feature(enable = "avx512f,avx512vl")]
37503#[cfg_attr(test, assert_instr(vmovdqa64))]
37504#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37505#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
37506pub const unsafe fn _mm256_mask_store_epi64(mem_addr: *mut i64, mask: __mmask8, a: __m256i) {
37507 let mask: Simd = simd_select_bitmask(m:mask, yes:i64x4::splat(!0), no:i64x4::ZERO);
37508 simd_masked_store!(SimdAlign::Vector, mask, mem_addr, a.as_i64x4());
37509}
37510
37511/// Store packed single-precision (32-bit) floating-point elements from a into memory using writemask k.
37512/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
37513///
37514/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_store_ps)
37515#[inline]
37516#[target_feature(enable = "avx512f,avx512vl")]
37517#[cfg_attr(test, assert_instr(vmovaps))]
37518#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37519#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
37520pub const unsafe fn _mm256_mask_store_ps(mem_addr: *mut f32, mask: __mmask8, a: __m256) {
37521 let mask: Simd = simd_select_bitmask(m:mask, yes:i32x8::splat(!0), no:i32x8::ZERO);
37522 simd_masked_store!(SimdAlign::Vector, mask, mem_addr, a.as_f32x8());
37523}
37524
37525/// Store packed double-precision (64-bit) floating-point elements from a into memory using writemask k.
37526/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
37527///
37528/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_store_pd)
37529#[inline]
37530#[target_feature(enable = "avx512f,avx512vl")]
37531#[cfg_attr(test, assert_instr(vmovapd))]
37532#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37533#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
37534pub const unsafe fn _mm256_mask_store_pd(mem_addr: *mut f64, mask: __mmask8, a: __m256d) {
37535 let mask: Simd = simd_select_bitmask(m:mask, yes:i64x4::splat(!0), no:i64x4::ZERO);
37536 simd_masked_store!(SimdAlign::Vector, mask, mem_addr, a.as_f64x4());
37537}
37538
37539/// Store packed 32-bit integers from a into memory using writemask k.
37540/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
37541///
37542/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_store_epi32)
37543#[inline]
37544#[target_feature(enable = "avx512f,avx512vl")]
37545#[cfg_attr(test, assert_instr(vmovdqa32))]
37546#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37547#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
37548pub const unsafe fn _mm_mask_store_epi32(mem_addr: *mut i32, mask: __mmask8, a: __m128i) {
37549 let mask: Simd = simd_select_bitmask(m:mask, yes:i32x4::splat(!0), no:i32x4::ZERO);
37550 simd_masked_store!(SimdAlign::Vector, mask, mem_addr, a.as_i32x4());
37551}
37552
37553/// Store packed 64-bit integers from a into memory using writemask k.
37554/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
37555///
37556/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_store_epi64)
37557#[inline]
37558#[target_feature(enable = "avx512f,avx512vl")]
37559#[cfg_attr(test, assert_instr(vmovdqa64))]
37560#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37561#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
37562pub const unsafe fn _mm_mask_store_epi64(mem_addr: *mut i64, mask: __mmask8, a: __m128i) {
37563 let mask: Simd = simd_select_bitmask(m:mask, yes:i64x2::splat(!0), no:i64x2::ZERO);
37564 simd_masked_store!(SimdAlign::Vector, mask, mem_addr, a.as_i64x2());
37565}
37566
37567/// Store packed single-precision (32-bit) floating-point elements from a into memory using writemask k.
37568/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
37569///
37570/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_store_ps)
37571#[inline]
37572#[target_feature(enable = "avx512f,avx512vl")]
37573#[cfg_attr(test, assert_instr(vmovaps))]
37574#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37575#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
37576pub const unsafe fn _mm_mask_store_ps(mem_addr: *mut f32, mask: __mmask8, a: __m128) {
37577 let mask: Simd = simd_select_bitmask(m:mask, yes:i32x4::splat(!0), no:i32x4::ZERO);
37578 simd_masked_store!(SimdAlign::Vector, mask, mem_addr, a.as_f32x4());
37579}
37580
37581/// Store packed double-precision (64-bit) floating-point elements from a into memory using writemask k.
37582/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
37583///
37584/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_store_pd)
37585#[inline]
37586#[target_feature(enable = "avx512f,avx512vl")]
37587#[cfg_attr(test, assert_instr(vmovapd))]
37588#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37589#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
37590pub const unsafe fn _mm_mask_store_pd(mem_addr: *mut f64, mask: __mmask8, a: __m128d) {
37591 let mask: Simd = simd_select_bitmask(m:mask, yes:i64x2::splat(!0), no:i64x2::ZERO);
37592 simd_masked_store!(SimdAlign::Vector, mask, mem_addr, a.as_f64x2());
37593}
37594
37595/// Store a single-precision (32-bit) floating-point element from a into memory using writemask k. mem_addr
37596/// must be aligned on a 16-byte boundary or a general-protection exception may be generated.
37597///
37598/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_store_ss)
37599#[inline]
37600#[cfg_attr(test, assert_instr(vmovss))]
37601#[target_feature(enable = "avx512f")]
37602#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37603pub unsafe fn _mm_mask_store_ss(mem_addr: *mut f32, k: __mmask8, a: __m128) {
37604 asm!(
37605 vps!("vmovss", "{{{k}}}, {a}"),
37606 p = in(reg) mem_addr,
37607 k = in(kreg) k,
37608 a = in(xmm_reg) a,
37609 options(nostack, preserves_flags),
37610 );
37611}
37612
37613/// Store a double-precision (64-bit) floating-point element from a into memory using writemask k. mem_addr
37614/// must be aligned on a 16-byte boundary or a general-protection exception may be generated.
37615///
37616/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_store_sd)
37617#[inline]
37618#[cfg_attr(test, assert_instr(vmovsd))]
37619#[target_feature(enable = "avx512f")]
37620#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37621pub unsafe fn _mm_mask_store_sd(mem_addr: *mut f64, k: __mmask8, a: __m128d) {
37622 asm!(
37623 vps!("vmovsd", "{{{k}}}, {a}"),
37624 p = in(reg) mem_addr,
37625 k = in(kreg) k,
37626 a = in(xmm_reg) a,
37627 options(nostack, preserves_flags),
37628 );
37629}
37630
37631/// Load contiguous active 32-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
37632///
37633/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_expandloadu_epi32)
37634#[inline]
37635#[target_feature(enable = "avx512f")]
37636#[cfg_attr(test, assert_instr(vpexpandd))]
37637#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37638pub unsafe fn _mm512_mask_expandloadu_epi32(
37639 src: __m512i,
37640 k: __mmask16,
37641 mem_addr: *const i32,
37642) -> __m512i {
37643 transmute(src:expandloadd_512(mem_addr, a:src.as_i32x16(), mask:k))
37644}
37645
37646/// Load contiguous active 32-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
37647///
37648/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_expandloadu_epi32)
37649#[inline]
37650#[target_feature(enable = "avx512f")]
37651#[cfg_attr(test, assert_instr(vpexpandd))]
37652#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37653pub unsafe fn _mm512_maskz_expandloadu_epi32(k: __mmask16, mem_addr: *const i32) -> __m512i {
37654 _mm512_mask_expandloadu_epi32(src:_mm512_setzero_si512(), k, mem_addr)
37655}
37656
37657/// Load contiguous active 32-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
37658///
37659/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_expandloadu_epi32)
37660#[inline]
37661#[target_feature(enable = "avx512f,avx512vl")]
37662#[cfg_attr(test, assert_instr(vpexpandd))]
37663#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37664pub unsafe fn _mm256_mask_expandloadu_epi32(
37665 src: __m256i,
37666 k: __mmask8,
37667 mem_addr: *const i32,
37668) -> __m256i {
37669 transmute(src:expandloadd_256(mem_addr, a:src.as_i32x8(), mask:k))
37670}
37671
37672/// Load contiguous active 32-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
37673///
37674/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_expandloadu_epi32)
37675#[inline]
37676#[target_feature(enable = "avx512f,avx512vl")]
37677#[cfg_attr(test, assert_instr(vpexpandd))]
37678#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37679pub unsafe fn _mm256_maskz_expandloadu_epi32(k: __mmask8, mem_addr: *const i32) -> __m256i {
37680 _mm256_mask_expandloadu_epi32(src:_mm256_setzero_si256(), k, mem_addr)
37681}
37682
37683/// Load contiguous active 32-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
37684///
37685/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_expandloadu_epi32)
37686#[inline]
37687#[target_feature(enable = "avx512f,avx512vl")]
37688#[cfg_attr(test, assert_instr(vpexpandd))]
37689#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37690pub unsafe fn _mm_mask_expandloadu_epi32(
37691 src: __m128i,
37692 k: __mmask8,
37693 mem_addr: *const i32,
37694) -> __m128i {
37695 transmute(src:expandloadd_128(mem_addr, a:src.as_i32x4(), mask:k))
37696}
37697
37698/// Load contiguous active 32-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
37699///
37700/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_expandloadu_epi32)
37701#[inline]
37702#[target_feature(enable = "avx512f,avx512vl")]
37703#[cfg_attr(test, assert_instr(vpexpandd))]
37704#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37705pub unsafe fn _mm_maskz_expandloadu_epi32(k: __mmask8, mem_addr: *const i32) -> __m128i {
37706 _mm_mask_expandloadu_epi32(src:_mm_setzero_si128(), k, mem_addr)
37707}
37708
37709/// Load contiguous active 64-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
37710///
37711/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_expandloadu_epi64)
37712#[inline]
37713#[target_feature(enable = "avx512f")]
37714#[cfg_attr(test, assert_instr(vpexpandq))]
37715#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37716pub unsafe fn _mm512_mask_expandloadu_epi64(
37717 src: __m512i,
37718 k: __mmask8,
37719 mem_addr: *const i64,
37720) -> __m512i {
37721 transmute(src:expandloadq_512(mem_addr, a:src.as_i64x8(), mask:k))
37722}
37723
37724/// Load contiguous active 64-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
37725///
37726/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_expandloadu_epi64)
37727#[inline]
37728#[target_feature(enable = "avx512f")]
37729#[cfg_attr(test, assert_instr(vpexpandq))]
37730#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37731pub unsafe fn _mm512_maskz_expandloadu_epi64(k: __mmask8, mem_addr: *const i64) -> __m512i {
37732 _mm512_mask_expandloadu_epi64(src:_mm512_setzero_si512(), k, mem_addr)
37733}
37734
37735/// Load contiguous active 64-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
37736///
37737/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_expandloadu_epi64)
37738#[inline]
37739#[target_feature(enable = "avx512f,avx512vl")]
37740#[cfg_attr(test, assert_instr(vpexpandq))]
37741#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37742pub unsafe fn _mm256_mask_expandloadu_epi64(
37743 src: __m256i,
37744 k: __mmask8,
37745 mem_addr: *const i64,
37746) -> __m256i {
37747 transmute(src:expandloadq_256(mem_addr, a:src.as_i64x4(), mask:k))
37748}
37749
37750/// Load contiguous active 64-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
37751///
37752/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_expandloadu_epi64)
37753#[inline]
37754#[target_feature(enable = "avx512f,avx512vl")]
37755#[cfg_attr(test, assert_instr(vpexpandq))]
37756#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37757pub unsafe fn _mm256_maskz_expandloadu_epi64(k: __mmask8, mem_addr: *const i64) -> __m256i {
37758 _mm256_mask_expandloadu_epi64(src:_mm256_setzero_si256(), k, mem_addr)
37759}
37760
37761/// Load contiguous active 64-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
37762///
37763/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_expandloadu_epi64)
37764#[inline]
37765#[target_feature(enable = "avx512f,avx512vl")]
37766#[cfg_attr(test, assert_instr(vpexpandq))]
37767#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37768pub unsafe fn _mm_mask_expandloadu_epi64(
37769 src: __m128i,
37770 k: __mmask8,
37771 mem_addr: *const i64,
37772) -> __m128i {
37773 transmute(src:expandloadq_128(mem_addr, a:src.as_i64x2(), mask:k))
37774}
37775
37776/// Load contiguous active 64-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
37777///
37778/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_expandloadu_epi64)
37779#[inline]
37780#[target_feature(enable = "avx512f,avx512vl")]
37781#[cfg_attr(test, assert_instr(vpexpandq))]
37782#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37783pub unsafe fn _mm_maskz_expandloadu_epi64(k: __mmask8, mem_addr: *const i64) -> __m128i {
37784 _mm_mask_expandloadu_epi64(src:_mm_setzero_si128(), k, mem_addr)
37785}
37786
37787/// Load contiguous active single-precision (32-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
37788///
37789/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_expandloadu_ps)
37790#[inline]
37791#[target_feature(enable = "avx512f")]
37792#[cfg_attr(test, assert_instr(vexpandps))]
37793#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37794pub unsafe fn _mm512_mask_expandloadu_ps(
37795 src: __m512,
37796 k: __mmask16,
37797 mem_addr: *const f32,
37798) -> __m512 {
37799 transmute(src:expandloadps_512(mem_addr, a:src.as_f32x16(), mask:k))
37800}
37801
37802/// Load contiguous active single-precision (32-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
37803///
37804/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_expandloadu_ps)
37805#[inline]
37806#[target_feature(enable = "avx512f")]
37807#[cfg_attr(test, assert_instr(vexpandps))]
37808#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37809pub unsafe fn _mm512_maskz_expandloadu_ps(k: __mmask16, mem_addr: *const f32) -> __m512 {
37810 _mm512_mask_expandloadu_ps(src:_mm512_setzero_ps(), k, mem_addr)
37811}
37812
37813/// Load contiguous active single-precision (32-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
37814///
37815/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_expandloadu_ps)
37816#[inline]
37817#[target_feature(enable = "avx512f,avx512vl")]
37818#[cfg_attr(test, assert_instr(vexpandps))]
37819#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37820pub unsafe fn _mm256_mask_expandloadu_ps(src: __m256, k: __mmask8, mem_addr: *const f32) -> __m256 {
37821 transmute(src:expandloadps_256(mem_addr, a:src.as_f32x8(), mask:k))
37822}
37823
37824/// Load contiguous active single-precision (32-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
37825///
37826/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_expandloadu_ps)
37827#[inline]
37828#[target_feature(enable = "avx512f,avx512vl")]
37829#[cfg_attr(test, assert_instr(vexpandps))]
37830#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37831pub unsafe fn _mm256_maskz_expandloadu_ps(k: __mmask8, mem_addr: *const f32) -> __m256 {
37832 _mm256_mask_expandloadu_ps(src:_mm256_setzero_ps(), k, mem_addr)
37833}
37834
37835/// Load contiguous active single-precision (32-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
37836///
37837/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_expandloadu_ps)
37838#[inline]
37839#[target_feature(enable = "avx512f,avx512vl")]
37840#[cfg_attr(test, assert_instr(vexpandps))]
37841#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37842pub unsafe fn _mm_mask_expandloadu_ps(src: __m128, k: __mmask8, mem_addr: *const f32) -> __m128 {
37843 transmute(src:expandloadps_128(mem_addr, a:src.as_f32x4(), mask:k))
37844}
37845
37846/// Load contiguous active single-precision (32-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
37847///
37848/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_expandloadu_ps)
37849#[inline]
37850#[target_feature(enable = "avx512f,avx512vl")]
37851#[cfg_attr(test, assert_instr(vexpandps))]
37852#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37853pub unsafe fn _mm_maskz_expandloadu_ps(k: __mmask8, mem_addr: *const f32) -> __m128 {
37854 _mm_mask_expandloadu_ps(src:_mm_setzero_ps(), k, mem_addr)
37855}
37856
37857/// Load contiguous active double-precision (64-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
37858///
37859/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_expandloadu_pd)
37860#[inline]
37861#[target_feature(enable = "avx512f")]
37862#[cfg_attr(test, assert_instr(vexpandpd))]
37863#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37864pub unsafe fn _mm512_mask_expandloadu_pd(
37865 src: __m512d,
37866 k: __mmask8,
37867 mem_addr: *const f64,
37868) -> __m512d {
37869 transmute(src:expandloadpd_512(mem_addr, a:src.as_f64x8(), mask:k))
37870}
37871
37872/// Load contiguous active double-precision (64-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
37873///
37874/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_expandloadu_pd)
37875#[inline]
37876#[target_feature(enable = "avx512f")]
37877#[cfg_attr(test, assert_instr(vexpandpd))]
37878#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37879pub unsafe fn _mm512_maskz_expandloadu_pd(k: __mmask8, mem_addr: *const f64) -> __m512d {
37880 _mm512_mask_expandloadu_pd(src:_mm512_setzero_pd(), k, mem_addr)
37881}
37882
37883/// Load contiguous active double-precision (64-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
37884///
37885/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_expandloadu_pd)
37886#[inline]
37887#[target_feature(enable = "avx512f,avx512vl")]
37888#[cfg_attr(test, assert_instr(vexpandpd))]
37889#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37890pub unsafe fn _mm256_mask_expandloadu_pd(
37891 src: __m256d,
37892 k: __mmask8,
37893 mem_addr: *const f64,
37894) -> __m256d {
37895 transmute(src:expandloadpd_256(mem_addr, a:src.as_f64x4(), mask:k))
37896}
37897
37898/// Load contiguous active double-precision (64-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
37899///
37900/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_expandloadu_pd)
37901#[inline]
37902#[target_feature(enable = "avx512f,avx512vl")]
37903#[cfg_attr(test, assert_instr(vexpandpd))]
37904#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37905pub unsafe fn _mm256_maskz_expandloadu_pd(k: __mmask8, mem_addr: *const f64) -> __m256d {
37906 _mm256_mask_expandloadu_pd(src:_mm256_setzero_pd(), k, mem_addr)
37907}
37908
37909/// Load contiguous active double-precision (64-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
37910///
37911/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_expandloadu_pd)
37912#[inline]
37913#[target_feature(enable = "avx512f,avx512vl")]
37914#[cfg_attr(test, assert_instr(vexpandpd))]
37915#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37916pub unsafe fn _mm_mask_expandloadu_pd(src: __m128d, k: __mmask8, mem_addr: *const f64) -> __m128d {
37917 transmute(src:expandloadpd_128(mem_addr, a:src.as_f64x2(), mask:k))
37918}
37919
37920/// Load contiguous active double-precision (64-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
37921///
37922/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_expandloadu_pd)
37923#[inline]
37924#[target_feature(enable = "avx512f,avx512vl")]
37925#[cfg_attr(test, assert_instr(vexpandpd))]
37926#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37927pub unsafe fn _mm_maskz_expandloadu_pd(k: __mmask8, mem_addr: *const f64) -> __m128d {
37928 _mm_mask_expandloadu_pd(src:_mm_setzero_pd(), k, mem_addr)
37929}
37930
37931/// Set packed double-precision (64-bit) floating-point elements in dst with the supplied values in reverse order.
37932///
37933/// [Intel's documentation]( https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setr_pd&expand=5002)
37934#[inline]
37935#[target_feature(enable = "avx512f")]
37936#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37937#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
37938pub const fn _mm512_setr_pd(
37939 e0: f64,
37940 e1: f64,
37941 e2: f64,
37942 e3: f64,
37943 e4: f64,
37944 e5: f64,
37945 e6: f64,
37946 e7: f64,
37947) -> __m512d {
37948 unsafe {
37949 let r: Simd = f64x8::new(x0:e0, x1:e1, x2:e2, x3:e3, x4:e4, x5:e5, x6:e6, x7:e7);
37950 transmute(src:r)
37951 }
37952}
37953
37954/// Set packed double-precision (64-bit) floating-point elements in dst with the supplied values.
37955///
37956/// [Intel's documentation]( https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set_pd&expand=4924)
37957#[inline]
37958#[target_feature(enable = "avx512f")]
37959#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37960#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
37961pub const fn _mm512_set_pd(
37962 e0: f64,
37963 e1: f64,
37964 e2: f64,
37965 e3: f64,
37966 e4: f64,
37967 e5: f64,
37968 e6: f64,
37969 e7: f64,
37970) -> __m512d {
37971 _mm512_setr_pd(e0:e7, e1:e6, e2:e5, e3:e4, e4:e3, e5:e2, e6:e1, e7:e0)
37972}
37973
37974/// Move the lower single-precision (32-bit) floating-point element from b to the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
37975///
37976/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_move_ss&expand=3832)
37977#[inline]
37978#[target_feature(enable = "avx512f")]
37979#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37980#[cfg_attr(test, assert_instr(vmovss))]
37981#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
37982pub const fn _mm_mask_move_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
37983 unsafe {
37984 let extractsrc: f32 = simd_extract!(src, 0);
37985 let mut mov: f32 = extractsrc;
37986 if (k & 0b00000001) != 0 {
37987 mov = simd_extract!(b, 0);
37988 }
37989 simd_insert!(a, 0, mov)
37990 }
37991}
37992
37993/// Move the lower single-precision (32-bit) floating-point element from b to the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
37994///
37995/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_move_ss&expand=3833)
37996#[inline]
37997#[target_feature(enable = "avx512f")]
37998#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37999#[cfg_attr(test, assert_instr(vmovss))]
38000#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
38001pub const fn _mm_maskz_move_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
38002 unsafe {
38003 let mut mov: f32 = 0.;
38004 if (k & 0b00000001) != 0 {
38005 mov = simd_extract!(b, 0);
38006 }
38007 simd_insert!(a, 0, mov)
38008 }
38009}
38010
38011/// Move the lower double-precision (64-bit) floating-point element from b to the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
38012///
38013/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_move_sd&expand=3829)
38014#[inline]
38015#[target_feature(enable = "avx512f")]
38016#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38017#[cfg_attr(test, assert_instr(vmovsd))]
38018#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
38019pub const fn _mm_mask_move_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
38020 unsafe {
38021 let extractsrc: f64 = simd_extract!(src, 0);
38022 let mut mov: f64 = extractsrc;
38023 if (k & 0b00000001) != 0 {
38024 mov = simd_extract!(b, 0);
38025 }
38026 simd_insert!(a, 0, mov)
38027 }
38028}
38029
38030/// Move the lower double-precision (64-bit) floating-point element from b to the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
38031///
38032/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_move_sd&expand=3830)
38033#[inline]
38034#[target_feature(enable = "avx512f")]
38035#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38036#[cfg_attr(test, assert_instr(vmovsd))]
38037#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
38038pub const fn _mm_maskz_move_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
38039 unsafe {
38040 let mut mov: f64 = 0.;
38041 if (k & 0b00000001) != 0 {
38042 mov = simd_extract!(b, 0);
38043 }
38044 simd_insert!(a, 0, mov)
38045 }
38046}
38047
38048/// Add the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
38049///
38050/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_add_ss&expand=159)
38051#[inline]
38052#[target_feature(enable = "avx512f")]
38053#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38054#[cfg_attr(test, assert_instr(vaddss))]
38055#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
38056pub const fn _mm_mask_add_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
38057 unsafe {
38058 let extractsrc: f32 = simd_extract!(src, 0);
38059 let mut add: f32 = extractsrc;
38060 if (k & 0b00000001) != 0 {
38061 let extracta: f32 = simd_extract!(a, 0);
38062 let extractb: f32 = simd_extract!(b, 0);
38063 add = extracta + extractb;
38064 }
38065 simd_insert!(a, 0, add)
38066 }
38067}
38068
38069/// Add the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
38070///
38071/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_add_ss&expand=160)
38072#[inline]
38073#[target_feature(enable = "avx512f")]
38074#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38075#[cfg_attr(test, assert_instr(vaddss))]
38076#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
38077pub const fn _mm_maskz_add_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
38078 unsafe {
38079 let mut add: f32 = 0.;
38080 if (k & 0b00000001) != 0 {
38081 let extracta: f32 = simd_extract!(a, 0);
38082 let extractb: f32 = simd_extract!(b, 0);
38083 add = extracta + extractb;
38084 }
38085 simd_insert!(a, 0, add)
38086 }
38087}
38088
38089/// Add the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
38090///
38091/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_add_sd&expand=155)
38092#[inline]
38093#[target_feature(enable = "avx512f")]
38094#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38095#[cfg_attr(test, assert_instr(vaddsd))]
38096#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
38097pub const fn _mm_mask_add_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
38098 unsafe {
38099 let extractsrc: f64 = simd_extract!(src, 0);
38100 let mut add: f64 = extractsrc;
38101 if (k & 0b00000001) != 0 {
38102 let extracta: f64 = simd_extract!(a, 0);
38103 let extractb: f64 = simd_extract!(b, 0);
38104 add = extracta + extractb;
38105 }
38106 simd_insert!(a, 0, add)
38107 }
38108}
38109
38110/// Add the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
38111///
38112/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_add_sd&expand=156)
38113#[inline]
38114#[target_feature(enable = "avx512f")]
38115#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38116#[cfg_attr(test, assert_instr(vaddsd))]
38117#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
38118pub const fn _mm_maskz_add_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
38119 unsafe {
38120 let mut add: f64 = 0.;
38121 if (k & 0b00000001) != 0 {
38122 let extracta: f64 = simd_extract!(a, 0);
38123 let extractb: f64 = simd_extract!(b, 0);
38124 add = extracta + extractb;
38125 }
38126 simd_insert!(a, 0, add)
38127 }
38128}
38129
38130/// Subtract the lower single-precision (32-bit) floating-point element in b from the lower single-precision (32-bit) floating-point element in a, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
38131///
38132/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_sub_ss&expand=5750)
38133#[inline]
38134#[target_feature(enable = "avx512f")]
38135#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38136#[cfg_attr(test, assert_instr(vsubss))]
38137#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
38138pub const fn _mm_mask_sub_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
38139 unsafe {
38140 let extractsrc: f32 = simd_extract!(src, 0);
38141 let mut add: f32 = extractsrc;
38142 if (k & 0b00000001) != 0 {
38143 let extracta: f32 = simd_extract!(a, 0);
38144 let extractb: f32 = simd_extract!(b, 0);
38145 add = extracta - extractb;
38146 }
38147 simd_insert!(a, 0, add)
38148 }
38149}
38150
38151/// Subtract the lower single-precision (32-bit) floating-point element in b from the lower single-precision (32-bit) floating-point element in a, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
38152///
38153/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_sub_ss&expand=5751)
38154#[inline]
38155#[target_feature(enable = "avx512f")]
38156#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38157#[cfg_attr(test, assert_instr(vsubss))]
38158#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
38159pub const fn _mm_maskz_sub_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
38160 unsafe {
38161 let mut add: f32 = 0.;
38162 if (k & 0b00000001) != 0 {
38163 let extracta: f32 = simd_extract!(a, 0);
38164 let extractb: f32 = simd_extract!(b, 0);
38165 add = extracta - extractb;
38166 }
38167 simd_insert!(a, 0, add)
38168 }
38169}
38170
38171/// Subtract the lower double-precision (64-bit) floating-point element in b from the lower double-precision (64-bit) floating-point element in a, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
38172///
38173/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_sub_sd&expand=5746)
38174#[inline]
38175#[target_feature(enable = "avx512f")]
38176#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38177#[cfg_attr(test, assert_instr(vsubsd))]
38178#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
38179pub const fn _mm_mask_sub_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
38180 unsafe {
38181 let extractsrc: f64 = simd_extract!(src, 0);
38182 let mut add: f64 = extractsrc;
38183 if (k & 0b00000001) != 0 {
38184 let extracta: f64 = simd_extract!(a, 0);
38185 let extractb: f64 = simd_extract!(b, 0);
38186 add = extracta - extractb;
38187 }
38188 simd_insert!(a, 0, add)
38189 }
38190}
38191
38192/// Subtract the lower double-precision (64-bit) floating-point element in b from the lower double-precision (64-bit) floating-point element in a, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
38193///
38194/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_sub_sd&expand=5747)
38195#[inline]
38196#[target_feature(enable = "avx512f")]
38197#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38198#[cfg_attr(test, assert_instr(vsubsd))]
38199#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
38200pub const fn _mm_maskz_sub_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
38201 unsafe {
38202 let mut add: f64 = 0.;
38203 if (k & 0b00000001) != 0 {
38204 let extracta: f64 = simd_extract!(a, 0);
38205 let extractb: f64 = simd_extract!(b, 0);
38206 add = extracta - extractb;
38207 }
38208 simd_insert!(a, 0, add)
38209 }
38210}
38211
38212/// Multiply the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
38213///
38214/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_mul_ss&expand=3950)
38215#[inline]
38216#[target_feature(enable = "avx512f")]
38217#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38218#[cfg_attr(test, assert_instr(vmulss))]
38219#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
38220pub const fn _mm_mask_mul_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
38221 unsafe {
38222 let extractsrc: f32 = simd_extract!(src, 0);
38223 let mut add: f32 = extractsrc;
38224 if (k & 0b00000001) != 0 {
38225 let extracta: f32 = simd_extract!(a, 0);
38226 let extractb: f32 = simd_extract!(b, 0);
38227 add = extracta * extractb;
38228 }
38229 simd_insert!(a, 0, add)
38230 }
38231}
38232
38233/// Multiply the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
38234///
38235/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_mul_ss&expand=3951)
38236#[inline]
38237#[target_feature(enable = "avx512f")]
38238#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38239#[cfg_attr(test, assert_instr(vmulss))]
38240#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
38241pub const fn _mm_maskz_mul_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
38242 unsafe {
38243 let mut add: f32 = 0.;
38244 if (k & 0b00000001) != 0 {
38245 let extracta: f32 = simd_extract!(a, 0);
38246 let extractb: f32 = simd_extract!(b, 0);
38247 add = extracta * extractb;
38248 }
38249 simd_insert!(a, 0, add)
38250 }
38251}
38252
38253/// Multiply the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
38254///
38255/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_mul_sd&expand=3947)
38256#[inline]
38257#[target_feature(enable = "avx512f")]
38258#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38259#[cfg_attr(test, assert_instr(vmulsd))]
38260#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
38261pub const fn _mm_mask_mul_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
38262 unsafe {
38263 let extractsrc: f64 = simd_extract!(src, 0);
38264 let mut add: f64 = extractsrc;
38265 if (k & 0b00000001) != 0 {
38266 let extracta: f64 = simd_extract!(a, 0);
38267 let extractb: f64 = simd_extract!(b, 0);
38268 add = extracta * extractb;
38269 }
38270 simd_insert!(a, 0, add)
38271 }
38272}
38273
38274/// Multiply the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
38275///
38276/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_mul_sd&expand=3948)
38277#[inline]
38278#[target_feature(enable = "avx512f")]
38279#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38280#[cfg_attr(test, assert_instr(vmulsd))]
38281#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
38282pub const fn _mm_maskz_mul_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
38283 unsafe {
38284 let mut add: f64 = 0.;
38285 if (k & 0b00000001) != 0 {
38286 let extracta: f64 = simd_extract!(a, 0);
38287 let extractb: f64 = simd_extract!(b, 0);
38288 add = extracta * extractb;
38289 }
38290 simd_insert!(a, 0, add)
38291 }
38292}
38293
38294/// Divide the lower single-precision (32-bit) floating-point element in a by the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
38295///
38296/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_div_ss&expand=2181)
38297#[inline]
38298#[target_feature(enable = "avx512f")]
38299#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38300#[cfg_attr(test, assert_instr(vdivss))]
38301#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
38302pub const fn _mm_mask_div_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
38303 unsafe {
38304 let extractsrc: f32 = simd_extract!(src, 0);
38305 let mut add: f32 = extractsrc;
38306 if (k & 0b00000001) != 0 {
38307 let extracta: f32 = simd_extract!(a, 0);
38308 let extractb: f32 = simd_extract!(b, 0);
38309 add = extracta / extractb;
38310 }
38311 simd_insert!(a, 0, add)
38312 }
38313}
38314
38315/// Divide the lower single-precision (32-bit) floating-point element in a by the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
38316///
38317/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_div_ss&expand=2182)
38318#[inline]
38319#[target_feature(enable = "avx512f")]
38320#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38321#[cfg_attr(test, assert_instr(vdivss))]
38322#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
38323pub const fn _mm_maskz_div_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
38324 unsafe {
38325 let mut add: f32 = 0.;
38326 if (k & 0b00000001) != 0 {
38327 let extracta: f32 = simd_extract!(a, 0);
38328 let extractb: f32 = simd_extract!(b, 0);
38329 add = extracta / extractb;
38330 }
38331 simd_insert!(a, 0, add)
38332 }
38333}
38334
38335/// Divide the lower double-precision (64-bit) floating-point element in a by the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
38336///
38337/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_div_sd&expand=2178)
38338#[inline]
38339#[target_feature(enable = "avx512f")]
38340#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38341#[cfg_attr(test, assert_instr(vdivsd))]
38342#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
38343pub const fn _mm_mask_div_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
38344 unsafe {
38345 let extractsrc: f64 = simd_extract!(src, 0);
38346 let mut add: f64 = extractsrc;
38347 if (k & 0b00000001) != 0 {
38348 let extracta: f64 = simd_extract!(a, 0);
38349 let extractb: f64 = simd_extract!(b, 0);
38350 add = extracta / extractb;
38351 }
38352 simd_insert!(a, 0, add)
38353 }
38354}
38355
38356/// Divide the lower double-precision (64-bit) floating-point element in a by the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
38357///
38358/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_div_sd&expand=2179)
38359#[inline]
38360#[target_feature(enable = "avx512f")]
38361#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38362#[cfg_attr(test, assert_instr(vdivsd))]
38363#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
38364pub const fn _mm_maskz_div_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
38365 unsafe {
38366 let mut add: f64 = 0.;
38367 if (k & 0b00000001) != 0 {
38368 let extracta: f64 = simd_extract!(a, 0);
38369 let extractb: f64 = simd_extract!(b, 0);
38370 add = extracta / extractb;
38371 }
38372 simd_insert!(a, 0, add)
38373 }
38374}
38375
38376/// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the maximum value in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
38377///
38378/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_max_ss&expand=3672)
38379#[inline]
38380#[target_feature(enable = "avx512f")]
38381#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38382#[cfg_attr(test, assert_instr(vmaxss))]
38383pub fn _mm_mask_max_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
38384 unsafe {
38385 transmute(src:vmaxss(
38386 a.as_f32x4(),
38387 b.as_f32x4(),
38388 src.as_f32x4(),
38389 mask:k,
38390 _MM_FROUND_CUR_DIRECTION,
38391 ))
38392 }
38393}
38394
38395/// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the maximum value in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
38396///
38397/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_max_ss&expand=3673)
38398#[inline]
38399#[target_feature(enable = "avx512f")]
38400#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38401#[cfg_attr(test, assert_instr(vmaxss))]
38402pub fn _mm_maskz_max_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
38403 unsafe {
38404 transmute(src:vmaxss(
38405 a.as_f32x4(),
38406 b.as_f32x4(),
38407 src:f32x4::ZERO,
38408 mask:k,
38409 _MM_FROUND_CUR_DIRECTION,
38410 ))
38411 }
38412}
38413
38414/// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the maximum value in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
38415///
38416/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_max_sd&expand=3669)
38417#[inline]
38418#[target_feature(enable = "avx512f")]
38419#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38420#[cfg_attr(test, assert_instr(vmaxsd))]
38421pub fn _mm_mask_max_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
38422 unsafe {
38423 transmute(src:vmaxsd(
38424 a.as_f64x2(),
38425 b.as_f64x2(),
38426 src.as_f64x2(),
38427 mask:k,
38428 _MM_FROUND_CUR_DIRECTION,
38429 ))
38430 }
38431}
38432
38433/// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the maximum value in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
38434///
38435/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_max_sd&expand=3670)
38436#[inline]
38437#[target_feature(enable = "avx512f")]
38438#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38439#[cfg_attr(test, assert_instr(vmaxsd))]
38440pub fn _mm_maskz_max_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
38441 unsafe {
38442 transmute(src:vmaxsd(
38443 a.as_f64x2(),
38444 b.as_f64x2(),
38445 src:f64x2::ZERO,
38446 mask:k,
38447 _MM_FROUND_CUR_DIRECTION,
38448 ))
38449 }
38450}
38451
38452/// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the minimum value in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
38453///
38454/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_min_ss&expand=3786)
38455#[inline]
38456#[target_feature(enable = "avx512f")]
38457#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38458#[cfg_attr(test, assert_instr(vminss))]
38459pub fn _mm_mask_min_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
38460 unsafe {
38461 transmute(src:vminss(
38462 a.as_f32x4(),
38463 b.as_f32x4(),
38464 src.as_f32x4(),
38465 mask:k,
38466 _MM_FROUND_CUR_DIRECTION,
38467 ))
38468 }
38469}
38470
38471/// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the minimum value in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
38472///
38473/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_min_ss&expand=3787)
38474#[inline]
38475#[target_feature(enable = "avx512f")]
38476#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38477#[cfg_attr(test, assert_instr(vminss))]
38478pub fn _mm_maskz_min_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
38479 unsafe {
38480 transmute(src:vminss(
38481 a.as_f32x4(),
38482 b.as_f32x4(),
38483 src:f32x4::ZERO,
38484 mask:k,
38485 _MM_FROUND_CUR_DIRECTION,
38486 ))
38487 }
38488}
38489
38490/// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the minimum value in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
38491///
38492/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_min_sd&expand=3783)
38493#[inline]
38494#[target_feature(enable = "avx512f")]
38495#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38496#[cfg_attr(test, assert_instr(vminsd))]
38497pub fn _mm_mask_min_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
38498 unsafe {
38499 transmute(src:vminsd(
38500 a.as_f64x2(),
38501 b.as_f64x2(),
38502 src.as_f64x2(),
38503 mask:k,
38504 _MM_FROUND_CUR_DIRECTION,
38505 ))
38506 }
38507}
38508
38509/// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the minimum value in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
38510///
38511/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_min_sd&expand=3784)
38512#[inline]
38513#[target_feature(enable = "avx512f")]
38514#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38515#[cfg_attr(test, assert_instr(vminsd))]
38516pub fn _mm_maskz_min_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
38517 unsafe {
38518 transmute(src:vminsd(
38519 a.as_f64x2(),
38520 b.as_f64x2(),
38521 src:f64x2::ZERO,
38522 mask:k,
38523 _MM_FROUND_CUR_DIRECTION,
38524 ))
38525 }
38526}
38527
38528/// Compute the square root of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
38529///
38530/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_sqrt_ss&expand=5387)
38531#[inline]
38532#[target_feature(enable = "avx512f")]
38533#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38534#[cfg_attr(test, assert_instr(vsqrtss))]
38535pub fn _mm_mask_sqrt_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
38536 unsafe { vsqrtss(a, b, src, mask:k, _MM_FROUND_CUR_DIRECTION) }
38537}
38538
38539/// Compute the square root of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
38540///
38541/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_sqrt_ss&expand=5388)
38542#[inline]
38543#[target_feature(enable = "avx512f")]
38544#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38545#[cfg_attr(test, assert_instr(vsqrtss))]
38546pub fn _mm_maskz_sqrt_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
38547 unsafe { vsqrtss(a, b, src:_mm_setzero_ps(), mask:k, _MM_FROUND_CUR_DIRECTION) }
38548}
38549
38550/// Compute the square root of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
38551///
38552/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_sqrt_sd&expand=5384)
38553#[inline]
38554#[target_feature(enable = "avx512f")]
38555#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38556#[cfg_attr(test, assert_instr(vsqrtsd))]
38557pub fn _mm_mask_sqrt_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
38558 unsafe { vsqrtsd(a, b, src, mask:k, _MM_FROUND_CUR_DIRECTION) }
38559}
38560
38561/// Compute the square root of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
38562///
38563/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_sqrt_sd&expand=5385)
38564#[inline]
38565#[target_feature(enable = "avx512f")]
38566#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38567#[cfg_attr(test, assert_instr(vsqrtsd))]
38568pub fn _mm_maskz_sqrt_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
38569 unsafe { vsqrtsd(a, b, src:_mm_setzero_pd(), mask:k, _MM_FROUND_CUR_DIRECTION) }
38570}
38571
38572/// Compute the approximate reciprocal square root of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. The maximum relative error for this approximation is less than 2^-14.
38573///
38574/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_rsqrt14_ss&expand=4825)
38575#[inline]
38576#[target_feature(enable = "avx512f")]
38577#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38578#[cfg_attr(test, assert_instr(vrsqrt14ss))]
38579pub fn _mm_rsqrt14_ss(a: __m128, b: __m128) -> __m128 {
38580 unsafe { transmute(src:vrsqrt14ss(a.as_f32x4(), b.as_f32x4(), src:f32x4::ZERO, mask:0b1)) }
38581}
38582
38583/// Compute the approximate reciprocal square root of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. The maximum relative error for this approximation is less than 2^-14.
38584///
38585/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_rsqrt14_ss&expand=4823)
38586#[inline]
38587#[target_feature(enable = "avx512f")]
38588#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38589#[cfg_attr(test, assert_instr(vrsqrt14ss))]
38590pub fn _mm_mask_rsqrt14_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
38591 unsafe { transmute(src:vrsqrt14ss(a.as_f32x4(), b.as_f32x4(), src.as_f32x4(), mask:k)) }
38592}
38593
38594/// Compute the approximate reciprocal square root of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. The maximum relative error for this approximation is less than 2^-14.
38595///
38596/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_rsqrt14_ss&expand=4824)
38597#[inline]
38598#[target_feature(enable = "avx512f")]
38599#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38600#[cfg_attr(test, assert_instr(vrsqrt14ss))]
38601pub fn _mm_maskz_rsqrt14_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
38602 unsafe { transmute(src:vrsqrt14ss(a.as_f32x4(), b.as_f32x4(), src:f32x4::ZERO, mask:k)) }
38603}
38604
38605/// Compute the approximate reciprocal square root of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst. The maximum relative error for this approximation is less than 2^-14.
38606///
38607/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_rsqrt14_sd&expand=4822)
38608#[inline]
38609#[target_feature(enable = "avx512f")]
38610#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38611#[cfg_attr(test, assert_instr(vrsqrt14sd))]
38612pub fn _mm_rsqrt14_sd(a: __m128d, b: __m128d) -> __m128d {
38613 unsafe { transmute(src:vrsqrt14sd(a.as_f64x2(), b.as_f64x2(), src:f64x2::ZERO, mask:0b1)) }
38614}
38615
38616/// Compute the approximate reciprocal square root of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. The maximum relative error for this approximation is less than 2^-14.
38617///
38618/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_rsqrt14_sd&expand=4820)
38619#[inline]
38620#[target_feature(enable = "avx512f")]
38621#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38622#[cfg_attr(test, assert_instr(vrsqrt14sd))]
38623pub fn _mm_mask_rsqrt14_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
38624 unsafe { transmute(src:vrsqrt14sd(a.as_f64x2(), b.as_f64x2(), src.as_f64x2(), mask:k)) }
38625}
38626
38627/// Compute the approximate reciprocal square root of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. The maximum relative error for this approximation is less than 2^-14.
38628///
38629/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_rsqrt14_sd&expand=4821)
38630#[inline]
38631#[target_feature(enable = "avx512f")]
38632#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38633#[cfg_attr(test, assert_instr(vrsqrt14sd))]
38634pub fn _mm_maskz_rsqrt14_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
38635 unsafe { transmute(src:vrsqrt14sd(a.as_f64x2(), b.as_f64x2(), src:f64x2::ZERO, mask:k)) }
38636}
38637
38638/// Compute the approximate reciprocal of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. The maximum relative error for this approximation is less than 2^-14.
38639///
38640/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_rcp14_ss&expand=4508)
38641#[inline]
38642#[target_feature(enable = "avx512f")]
38643#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38644#[cfg_attr(test, assert_instr(vrcp14ss))]
38645pub fn _mm_rcp14_ss(a: __m128, b: __m128) -> __m128 {
38646 unsafe { transmute(src:vrcp14ss(a.as_f32x4(), b.as_f32x4(), src:f32x4::ZERO, mask:0b1)) }
38647}
38648
38649/// Compute the approximate reciprocal of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. The maximum relative error for this approximation is less than 2^-14.
38650///
38651/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_rcp14_ss&expand=4506)
38652#[inline]
38653#[target_feature(enable = "avx512f")]
38654#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38655#[cfg_attr(test, assert_instr(vrcp14ss))]
38656pub fn _mm_mask_rcp14_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
38657 unsafe { transmute(src:vrcp14ss(a.as_f32x4(), b.as_f32x4(), src.as_f32x4(), mask:k)) }
38658}
38659
38660/// Compute the approximate reciprocal of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. The maximum relative error for this approximation is less than 2^-14.
38661///
38662/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_rcp14_ss&expand=4507)
38663#[inline]
38664#[target_feature(enable = "avx512f")]
38665#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38666#[cfg_attr(test, assert_instr(vrcp14ss))]
38667pub fn _mm_maskz_rcp14_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
38668 unsafe { transmute(src:vrcp14ss(a.as_f32x4(), b.as_f32x4(), src:f32x4::ZERO, mask:k)) }
38669}
38670
38671/// Compute the approximate reciprocal of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst. The maximum relative error for this approximation is less than 2^-14.
38672///
38673/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_rcp14_sd&expand=4505)
38674#[inline]
38675#[target_feature(enable = "avx512f")]
38676#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38677#[cfg_attr(test, assert_instr(vrcp14sd))]
38678pub fn _mm_rcp14_sd(a: __m128d, b: __m128d) -> __m128d {
38679 unsafe { transmute(src:vrcp14sd(a.as_f64x2(), b.as_f64x2(), src:f64x2::ZERO, mask:0b1)) }
38680}
38681
38682/// Compute the approximate reciprocal of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. The maximum relative error for this approximation is less than 2^-14.
38683///
38684/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_rcp14_sd&expand=4503)
38685#[inline]
38686#[target_feature(enable = "avx512f")]
38687#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38688#[cfg_attr(test, assert_instr(vrcp14sd))]
38689pub fn _mm_mask_rcp14_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
38690 unsafe { transmute(src:vrcp14sd(a.as_f64x2(), b.as_f64x2(), src.as_f64x2(), mask:k)) }
38691}
38692
38693/// Compute the approximate reciprocal of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. The maximum relative error for this approximation is less than 2^-14.
38694///
38695/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_rcp14_sd&expand=4504)
38696#[inline]
38697#[target_feature(enable = "avx512f")]
38698#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38699#[cfg_attr(test, assert_instr(vrcp14sd))]
38700pub fn _mm_maskz_rcp14_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
38701 unsafe { transmute(src:vrcp14sd(a.as_f64x2(), b.as_f64x2(), src:f64x2::ZERO, mask:k)) }
38702}
38703
38704/// Convert the exponent of the lower single-precision (32-bit) floating-point element in b to a single-precision (32-bit) floating-point number representing the integer exponent, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.
38705///
38706/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_getexp_ss&expand=2862)
38707#[inline]
38708#[target_feature(enable = "avx512f")]
38709#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38710#[cfg_attr(test, assert_instr(vgetexpss))]
38711pub fn _mm_getexp_ss(a: __m128, b: __m128) -> __m128 {
38712 unsafe {
38713 transmute(src:vgetexpss(
38714 a.as_f32x4(),
38715 b.as_f32x4(),
38716 src:f32x4::ZERO,
38717 mask:0b1,
38718 _MM_FROUND_NO_EXC,
38719 ))
38720 }
38721}
38722
38723/// Convert the exponent of the lower single-precision (32-bit) floating-point element in b to a single-precision (32-bit) floating-point number representing the integer exponent, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.
38724///
38725/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_getexp_ss&expand=2863)
38726#[inline]
38727#[target_feature(enable = "avx512f")]
38728#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38729#[cfg_attr(test, assert_instr(vgetexpss))]
38730pub fn _mm_mask_getexp_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
38731 unsafe {
38732 transmute(src:vgetexpss(
38733 a.as_f32x4(),
38734 b.as_f32x4(),
38735 src.as_f32x4(),
38736 mask:k,
38737 _MM_FROUND_NO_EXC,
38738 ))
38739 }
38740}
38741
38742/// Convert the exponent of the lower single-precision (32-bit) floating-point element in b to a single-precision (32-bit) floating-point number representing the integer exponent, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.
38743///
38744/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_getexp_ss&expand=2864)
38745#[inline]
38746#[target_feature(enable = "avx512f")]
38747#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38748#[cfg_attr(test, assert_instr(vgetexpss))]
38749pub fn _mm_maskz_getexp_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
38750 unsafe {
38751 transmute(src:vgetexpss(
38752 a.as_f32x4(),
38753 b.as_f32x4(),
38754 src:f32x4::ZERO,
38755 mask:k,
38756 _MM_FROUND_NO_EXC,
38757 ))
38758 }
38759}
38760
38761/// Convert the exponent of the lower double-precision (64-bit) floating-point element in b to a double-precision (64-bit) floating-point number representing the integer exponent, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.
38762///
38763/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_getexp_sd&expand=2859)
38764#[inline]
38765#[target_feature(enable = "avx512f")]
38766#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38767#[cfg_attr(test, assert_instr(vgetexpsd))]
38768pub fn _mm_getexp_sd(a: __m128d, b: __m128d) -> __m128d {
38769 unsafe {
38770 transmute(src:vgetexpsd(
38771 a.as_f64x2(),
38772 b.as_f64x2(),
38773 src:f64x2::ZERO,
38774 mask:0b1,
38775 _MM_FROUND_NO_EXC,
38776 ))
38777 }
38778}
38779
38780/// Convert the exponent of the lower double-precision (64-bit) floating-point element in b to a double-precision (64-bit) floating-point number representing the integer exponent, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.
38781///
38782/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_getexp_sd&expand=2860)
38783#[inline]
38784#[target_feature(enable = "avx512f")]
38785#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38786#[cfg_attr(test, assert_instr(vgetexpsd))]
38787pub fn _mm_mask_getexp_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
38788 unsafe {
38789 transmute(src:vgetexpsd(
38790 a.as_f64x2(),
38791 b.as_f64x2(),
38792 src.as_f64x2(),
38793 mask:k,
38794 _MM_FROUND_NO_EXC,
38795 ))
38796 }
38797}
38798
38799/// Convert the exponent of the lower double-precision (64-bit) floating-point element in b to a double-precision (64-bit) floating-point number representing the integer exponent, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.
38800///
38801/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_getexp_sd&expand=2861)
38802#[inline]
38803#[target_feature(enable = "avx512f")]
38804#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38805#[cfg_attr(test, assert_instr(vgetexpsd))]
38806pub fn _mm_maskz_getexp_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
38807 unsafe {
38808 transmute(src:vgetexpsd(
38809 a.as_f64x2(),
38810 b.as_f64x2(),
38811 src:f64x2::ZERO,
38812 mask:k,
38813 _MM_FROUND_NO_EXC,
38814 ))
38815 }
38816}
38817
38818/// Normalize the mantissas of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
38819/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
38820/// _MM_MANT_NORM_1_2 // interval [1, 2)\
38821/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
38822/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
38823/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
38824/// The sign is determined by sc which can take the following values:\
38825/// _MM_MANT_SIGN_src // sign = sign(src)\
38826/// _MM_MANT_SIGN_zero // sign = 0\
38827/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1\
38828/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
38829///
38830/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_getmant_ss&expand=2898)
38831#[inline]
38832#[target_feature(enable = "avx512f")]
38833#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38834#[cfg_attr(test, assert_instr(vgetmantss, NORM = 0, SIGN = 0))]
38835#[rustc_legacy_const_generics(2, 3)]
38836pub fn _mm_getmant_ss<const NORM: _MM_MANTISSA_NORM_ENUM, const SIGN: _MM_MANTISSA_SIGN_ENUM>(
38837 a: __m128,
38838 b: __m128,
38839) -> __m128 {
38840 unsafe {
38841 static_assert_uimm_bits!(NORM, 4);
38842 static_assert_uimm_bits!(SIGN, 2);
38843 let a: Simd = a.as_f32x4();
38844 let b: Simd = b.as_f32x4();
38845 let r: Simd = vgetmantss(
38846 a,
38847 b,
38848 SIGN << 2 | NORM,
38849 src:f32x4::ZERO,
38850 m:0b1,
38851 _MM_FROUND_CUR_DIRECTION,
38852 );
38853 transmute(src:r)
38854 }
38855}
38856
38857/// Normalize the mantissas of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
38858/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
38859/// _MM_MANT_NORM_1_2 // interval [1, 2)\
38860/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
38861/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
38862/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
38863/// The sign is determined by sc which can take the following values:\
38864/// _MM_MANT_SIGN_src // sign = sign(src)\
38865/// _MM_MANT_SIGN_zero // sign = 0\
38866/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1\
38867/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
38868///
38869/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_getmant_ss&expand=2899)
38870#[inline]
38871#[target_feature(enable = "avx512f")]
38872#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38873#[cfg_attr(test, assert_instr(vgetmantss, NORM = 0, SIGN = 0))]
38874#[rustc_legacy_const_generics(4, 5)]
38875pub fn _mm_mask_getmant_ss<
38876 const NORM: _MM_MANTISSA_NORM_ENUM,
38877 const SIGN: _MM_MANTISSA_SIGN_ENUM,
38878>(
38879 src: __m128,
38880 k: __mmask8,
38881 a: __m128,
38882 b: __m128,
38883) -> __m128 {
38884 unsafe {
38885 static_assert_uimm_bits!(NORM, 4);
38886 static_assert_uimm_bits!(SIGN, 2);
38887 let a: Simd = a.as_f32x4();
38888 let b: Simd = b.as_f32x4();
38889 let src: Simd = src.as_f32x4();
38890 let r: Simd = vgetmantss(a, b, SIGN << 2 | NORM, src, m:k, _MM_FROUND_CUR_DIRECTION);
38891 transmute(src:r)
38892 }
38893}
38894
38895/// Normalize the mantissas of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
38896/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
38897/// _MM_MANT_NORM_1_2 // interval [1, 2)\
38898/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
38899/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
38900/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
38901/// The sign is determined by sc which can take the following values:\
38902/// _MM_MANT_SIGN_src // sign = sign(src)\
38903/// _MM_MANT_SIGN_zero // sign = 0\
38904/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1\
38905/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
38906///
38907/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_getmant_ss&expand=2900)
38908#[inline]
38909#[target_feature(enable = "avx512f")]
38910#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38911#[cfg_attr(test, assert_instr(vgetmantss, NORM = 0, SIGN = 0))]
38912#[rustc_legacy_const_generics(3, 4)]
38913pub fn _mm_maskz_getmant_ss<
38914 const NORM: _MM_MANTISSA_NORM_ENUM,
38915 const SIGN: _MM_MANTISSA_SIGN_ENUM,
38916>(
38917 k: __mmask8,
38918 a: __m128,
38919 b: __m128,
38920) -> __m128 {
38921 unsafe {
38922 static_assert_uimm_bits!(NORM, 4);
38923 static_assert_uimm_bits!(SIGN, 2);
38924 let a: Simd = a.as_f32x4();
38925 let b: Simd = b.as_f32x4();
38926 let r: Simd = vgetmantss(
38927 a,
38928 b,
38929 SIGN << 2 | NORM,
38930 src:f32x4::ZERO,
38931 m:k,
38932 _MM_FROUND_CUR_DIRECTION,
38933 );
38934 transmute(src:r)
38935 }
38936}
38937
38938/// Normalize the mantissas of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
38939/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
38940/// _MM_MANT_NORM_1_2 // interval [1, 2)\
38941/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
38942/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
38943/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
38944/// The sign is determined by sc which can take the following values:\
38945/// _MM_MANT_SIGN_src // sign = sign(src)\
38946/// _MM_MANT_SIGN_zero // sign = 0\
38947/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1\
38948/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
38949///
38950/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_getmant_sd&expand=2895)
38951#[inline]
38952#[target_feature(enable = "avx512f")]
38953#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38954#[cfg_attr(test, assert_instr(vgetmantsd, NORM = 0, SIGN = 0))]
38955#[rustc_legacy_const_generics(2, 3)]
38956pub fn _mm_getmant_sd<const NORM: _MM_MANTISSA_NORM_ENUM, const SIGN: _MM_MANTISSA_SIGN_ENUM>(
38957 a: __m128d,
38958 b: __m128d,
38959) -> __m128d {
38960 unsafe {
38961 static_assert_uimm_bits!(NORM, 4);
38962 static_assert_uimm_bits!(SIGN, 2);
38963 let a: Simd = a.as_f64x2();
38964 let b: Simd = b.as_f64x2();
38965 let r: Simd = vgetmantsd(
38966 a,
38967 b,
38968 SIGN << 2 | NORM,
38969 src:f64x2::ZERO,
38970 m:0b1,
38971 _MM_FROUND_CUR_DIRECTION,
38972 );
38973 transmute(src:r)
38974 }
38975}
38976
38977/// Normalize the mantissas of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
38978/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
38979/// _MM_MANT_NORM_1_2 // interval [1, 2)\
38980/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
38981/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
38982/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
38983/// The sign is determined by sc which can take the following values:\
38984/// _MM_MANT_SIGN_src // sign = sign(src)\
38985/// _MM_MANT_SIGN_zero // sign = 0\
38986/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1\
38987/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
38988///
38989/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_getmant_sd&expand=2896)
38990#[inline]
38991#[target_feature(enable = "avx512f")]
38992#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38993#[cfg_attr(test, assert_instr(vgetmantsd, NORM = 0, SIGN = 0))]
38994#[rustc_legacy_const_generics(4, 5)]
38995pub fn _mm_mask_getmant_sd<
38996 const NORM: _MM_MANTISSA_NORM_ENUM,
38997 const SIGN: _MM_MANTISSA_SIGN_ENUM,
38998>(
38999 src: __m128d,
39000 k: __mmask8,
39001 a: __m128d,
39002 b: __m128d,
39003) -> __m128d {
39004 unsafe {
39005 static_assert_uimm_bits!(NORM, 4);
39006 static_assert_uimm_bits!(SIGN, 2);
39007 let a: Simd = a.as_f64x2();
39008 let b: Simd = b.as_f64x2();
39009 let src: Simd = src.as_f64x2();
39010 let r: Simd = vgetmantsd(a, b, SIGN << 2 | NORM, src, m:k, _MM_FROUND_CUR_DIRECTION);
39011 transmute(src:r)
39012 }
39013}
39014
39015/// Normalize the mantissas of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
39016/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
39017/// _MM_MANT_NORM_1_2 // interval [1, 2)\
39018/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
39019/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
39020/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
39021/// The sign is determined by sc which can take the following values:\
39022/// _MM_MANT_SIGN_src // sign = sign(src)\
39023/// _MM_MANT_SIGN_zero // sign = 0\
39024/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1\
39025/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
39026///
39027/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_getmant_sd&expand=2897)
39028#[inline]
39029#[target_feature(enable = "avx512f")]
39030#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39031#[cfg_attr(test, assert_instr(vgetmantsd, NORM = 0, SIGN = 0))]
39032#[rustc_legacy_const_generics(3, 4)]
39033pub fn _mm_maskz_getmant_sd<
39034 const NORM: _MM_MANTISSA_NORM_ENUM,
39035 const SIGN: _MM_MANTISSA_SIGN_ENUM,
39036>(
39037 k: __mmask8,
39038 a: __m128d,
39039 b: __m128d,
39040) -> __m128d {
39041 unsafe {
39042 static_assert_uimm_bits!(NORM, 4);
39043 static_assert_uimm_bits!(SIGN, 2);
39044 let a: Simd = a.as_f64x2();
39045 let b: Simd = b.as_f64x2();
39046 let r: Simd = vgetmantsd(
39047 a,
39048 b,
39049 SIGN << 2 | NORM,
39050 src:f64x2::ZERO,
39051 m:k,
39052 _MM_FROUND_CUR_DIRECTION,
39053 );
39054 transmute(src:r)
39055 }
39056}
39057
39058/// Round the lower single-precision (32-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
39059/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
39060/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
39061/// * [`_MM_FROUND_TO_NEG_INF`] : round down
39062/// * [`_MM_FROUND_TO_POS_INF`] : round up
39063/// * [`_MM_FROUND_TO_ZERO`] : truncate
39064/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39065///
39066/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_roundscale_ss&expand=4802)
39067#[inline]
39068#[target_feature(enable = "avx512f")]
39069#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39070#[cfg_attr(test, assert_instr(vrndscaless, IMM8 = 255))]
39071#[rustc_legacy_const_generics(2)]
39072pub fn _mm_roundscale_ss<const IMM8: i32>(a: __m128, b: __m128) -> __m128 {
39073 unsafe {
39074 static_assert_uimm_bits!(IMM8, 8);
39075 let a: Simd = a.as_f32x4();
39076 let b: Simd = b.as_f32x4();
39077 let r: Simd = vrndscaless(
39078 a,
39079 b,
39080 src:f32x4::ZERO,
39081 mask:0b11111111,
39082 IMM8,
39083 _MM_FROUND_CUR_DIRECTION,
39084 );
39085 transmute(src:r)
39086 }
39087}
39088
39089/// Round the lower single-precision (32-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
39090/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
39091/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
39092/// * [`_MM_FROUND_TO_NEG_INF`] : round down
39093/// * [`_MM_FROUND_TO_POS_INF`] : round up
39094/// * [`_MM_FROUND_TO_ZERO`] : truncate
39095/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39096///
39097/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_roundscale_ss&expand=4800)
39098#[inline]
39099#[target_feature(enable = "avx512f")]
39100#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39101#[cfg_attr(test, assert_instr(vrndscaless, IMM8 = 0))]
39102#[rustc_legacy_const_generics(4)]
39103pub fn _mm_mask_roundscale_ss<const IMM8: i32>(
39104 src: __m128,
39105 k: __mmask8,
39106 a: __m128,
39107 b: __m128,
39108) -> __m128 {
39109 unsafe {
39110 static_assert_uimm_bits!(IMM8, 8);
39111 let a: Simd = a.as_f32x4();
39112 let b: Simd = b.as_f32x4();
39113 let src: Simd = src.as_f32x4();
39114 let r: Simd = vrndscaless(a, b, src, mask:k, IMM8, _MM_FROUND_CUR_DIRECTION);
39115 transmute(src:r)
39116 }
39117}
39118
39119/// Round the lower single-precision (32-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
39120/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
39121/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
39122/// * [`_MM_FROUND_TO_NEG_INF`] : round down
39123/// * [`_MM_FROUND_TO_POS_INF`] : round up
39124/// * [`_MM_FROUND_TO_ZERO`] : truncate
39125/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39126///
39127/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_roundscale_ss&expand=4801)
39128#[inline]
39129#[target_feature(enable = "avx512f")]
39130#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39131#[cfg_attr(test, assert_instr(vrndscaless, IMM8 = 0))]
39132#[rustc_legacy_const_generics(3)]
39133pub fn _mm_maskz_roundscale_ss<const IMM8: i32>(k: __mmask8, a: __m128, b: __m128) -> __m128 {
39134 unsafe {
39135 static_assert_uimm_bits!(IMM8, 8);
39136 let a: Simd = a.as_f32x4();
39137 let b: Simd = b.as_f32x4();
39138 let r: Simd = vrndscaless(a, b, src:f32x4::ZERO, mask:k, IMM8, _MM_FROUND_CUR_DIRECTION);
39139 transmute(src:r)
39140 }
39141}
39142
39143/// Round the lower double-precision (64-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
39144/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
39145/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
39146/// * [`_MM_FROUND_TO_NEG_INF`] : round down
39147/// * [`_MM_FROUND_TO_POS_INF`] : round up
39148/// * [`_MM_FROUND_TO_ZERO`] : truncate
39149/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39150///
39151/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_roundscale_sd&expand=4799)
39152#[inline]
39153#[target_feature(enable = "avx512f")]
39154#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39155#[cfg_attr(test, assert_instr(vrndscalesd, IMM8 = 255))]
39156#[rustc_legacy_const_generics(2)]
39157pub fn _mm_roundscale_sd<const IMM8: i32>(a: __m128d, b: __m128d) -> __m128d {
39158 unsafe {
39159 static_assert_uimm_bits!(IMM8, 8);
39160 let a: Simd = a.as_f64x2();
39161 let b: Simd = b.as_f64x2();
39162 let r: Simd = vrndscalesd(
39163 a,
39164 b,
39165 src:f64x2::ZERO,
39166 mask:0b11111111,
39167 IMM8,
39168 _MM_FROUND_CUR_DIRECTION,
39169 );
39170 transmute(src:r)
39171 }
39172}
39173
39174/// Round the lower double-precision (64-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
39175/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
39176/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
39177/// * [`_MM_FROUND_TO_NEG_INF`] : round down
39178/// * [`_MM_FROUND_TO_POS_INF`] : round up
39179/// * [`_MM_FROUND_TO_ZERO`] : truncate
39180/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39181///
39182/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_roundscale_sd&expand=4797)
39183#[inline]
39184#[target_feature(enable = "avx512f")]
39185#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39186#[cfg_attr(test, assert_instr(vrndscalesd, IMM8 = 0))]
39187#[rustc_legacy_const_generics(4)]
39188pub fn _mm_mask_roundscale_sd<const IMM8: i32>(
39189 src: __m128d,
39190 k: __mmask8,
39191 a: __m128d,
39192 b: __m128d,
39193) -> __m128d {
39194 unsafe {
39195 static_assert_uimm_bits!(IMM8, 8);
39196 let a: Simd = a.as_f64x2();
39197 let b: Simd = b.as_f64x2();
39198 let src: Simd = src.as_f64x2();
39199 let r: Simd = vrndscalesd(a, b, src, mask:k, IMM8, _MM_FROUND_CUR_DIRECTION);
39200 transmute(src:r)
39201 }
39202}
39203
39204/// Round the lower double-precision (64-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
39205/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
39206/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
39207/// * [`_MM_FROUND_TO_NEG_INF`] : round down
39208/// * [`_MM_FROUND_TO_POS_INF`] : round up
39209/// * [`_MM_FROUND_TO_ZERO`] : truncate
39210/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39211///
39212/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_roundscale_sd&expand=4798)
39213#[inline]
39214#[target_feature(enable = "avx512f")]
39215#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39216#[cfg_attr(test, assert_instr(vrndscalesd, IMM8 = 0))]
39217#[rustc_legacy_const_generics(3)]
39218pub fn _mm_maskz_roundscale_sd<const IMM8: i32>(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
39219 unsafe {
39220 static_assert_uimm_bits!(IMM8, 8);
39221 let a: Simd = a.as_f64x2();
39222 let b: Simd = b.as_f64x2();
39223 let r: Simd = vrndscalesd(a, b, src:f64x2::ZERO, mask:k, IMM8, _MM_FROUND_CUR_DIRECTION);
39224 transmute(src:r)
39225 }
39226}
39227
39228/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.
39229///
39230/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_scalef_ss&expand=4901)
39231#[inline]
39232#[target_feature(enable = "avx512f")]
39233#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39234#[cfg_attr(test, assert_instr(vscalefss))]
39235pub fn _mm_scalef_ss(a: __m128, b: __m128) -> __m128 {
39236 unsafe {
39237 let a: Simd = a.as_f32x4();
39238 let b: Simd = b.as_f32x4();
39239 transmute(src:vscalefss(
39240 a,
39241 b,
39242 src:f32x4::ZERO,
39243 mask:0b11111111,
39244 _MM_FROUND_CUR_DIRECTION,
39245 ))
39246 }
39247}
39248
39249/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
39250///
39251/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_scalef_ss&expand=4899)
39252#[inline]
39253#[target_feature(enable = "avx512f")]
39254#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39255#[cfg_attr(test, assert_instr(vscalefss))]
39256pub fn _mm_mask_scalef_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
39257 unsafe {
39258 let a: Simd = a.as_f32x4();
39259 let b: Simd = b.as_f32x4();
39260 let src: Simd = src.as_f32x4();
39261 transmute(src:vscalefss(a, b, src, mask:k, _MM_FROUND_CUR_DIRECTION))
39262 }
39263}
39264
39265/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
39266///
39267/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_scalef_ss&expand=4900)
39268#[inline]
39269#[target_feature(enable = "avx512f")]
39270#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39271#[cfg_attr(test, assert_instr(vscalefss))]
39272pub fn _mm_maskz_scalef_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
39273 unsafe {
39274 transmute(src:vscalefss(
39275 a.as_f32x4(),
39276 b.as_f32x4(),
39277 src:f32x4::ZERO,
39278 mask:k,
39279 _MM_FROUND_CUR_DIRECTION,
39280 ))
39281 }
39282}
39283
39284/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.
39285///
39286/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_scalef_sd&expand=4898)
39287#[inline]
39288#[target_feature(enable = "avx512f")]
39289#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39290#[cfg_attr(test, assert_instr(vscalefsd))]
39291pub fn _mm_scalef_sd(a: __m128d, b: __m128d) -> __m128d {
39292 unsafe {
39293 transmute(src:vscalefsd(
39294 a.as_f64x2(),
39295 b.as_f64x2(),
39296 src:f64x2::ZERO,
39297 mask:0b11111111,
39298 _MM_FROUND_CUR_DIRECTION,
39299 ))
39300 }
39301}
39302
39303/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
39304///
39305/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_scalef_sd&expand=4896)
39306#[inline]
39307#[target_feature(enable = "avx512f")]
39308#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39309#[cfg_attr(test, assert_instr(vscalefsd))]
39310pub fn _mm_mask_scalef_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
39311 unsafe {
39312 transmute(src:vscalefsd(
39313 a.as_f64x2(),
39314 b.as_f64x2(),
39315 src.as_f64x2(),
39316 mask:k,
39317 _MM_FROUND_CUR_DIRECTION,
39318 ))
39319 }
39320}
39321
39322/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
39323///
39324/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_scalef_sd&expand=4897)
39325#[inline]
39326#[target_feature(enable = "avx512f")]
39327#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39328#[cfg_attr(test, assert_instr(vscalefsd))]
39329pub fn _mm_maskz_scalef_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
39330 unsafe {
39331 transmute(src:vscalefsd(
39332 a.as_f64x2(),
39333 b.as_f64x2(),
39334 src:f64x2::ZERO,
39335 mask:k,
39336 _MM_FROUND_CUR_DIRECTION,
39337 ))
39338 }
39339}
39340
39341/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
39342///
39343/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fmadd_ss&expand=2582)
39344#[inline]
39345#[target_feature(enable = "avx512f")]
39346#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39347#[cfg_attr(test, assert_instr(vfmadd))]
39348#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
39349pub const fn _mm_mask_fmadd_ss(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
39350 unsafe {
39351 let mut fmadd: f32 = simd_extract!(a, 0);
39352 if (k & 0b00000001) != 0 {
39353 let extractb: f32 = simd_extract!(b, 0);
39354 let extractc: f32 = simd_extract!(c, 0);
39355 fmadd = fmaf32(a:fmadd, b:extractb, c:extractc);
39356 }
39357 simd_insert!(a, 0, fmadd)
39358 }
39359}
39360
39361/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
39362///
39363/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fmadd_ss&expand=2584)
39364#[inline]
39365#[target_feature(enable = "avx512f")]
39366#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39367#[cfg_attr(test, assert_instr(vfmadd))]
39368#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
39369pub const fn _mm_maskz_fmadd_ss(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
39370 unsafe {
39371 let mut fmadd: f32 = 0.;
39372 if (k & 0b00000001) != 0 {
39373 let extracta: f32 = simd_extract!(a, 0);
39374 let extractb: f32 = simd_extract!(b, 0);
39375 let extractc: f32 = simd_extract!(c, 0);
39376 fmadd = fmaf32(a:extracta, b:extractb, c:extractc);
39377 }
39378 simd_insert!(a, 0, fmadd)
39379 }
39380}
39381
39382/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from c to the upper elements of dst.
39383///
39384/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fmadd_ss&expand=2583)
39385#[inline]
39386#[target_feature(enable = "avx512f")]
39387#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39388#[cfg_attr(test, assert_instr(vfmadd))]
39389#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
39390pub const fn _mm_mask3_fmadd_ss(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
39391 unsafe {
39392 let mut fmadd: f32 = simd_extract!(c, 0);
39393 if (k & 0b00000001) != 0 {
39394 let extracta: f32 = simd_extract!(a, 0);
39395 let extractb: f32 = simd_extract!(b, 0);
39396 fmadd = fmaf32(a:extracta, b:extractb, c:fmadd);
39397 }
39398 simd_insert!(c, 0, fmadd)
39399 }
39400}
39401
39402/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
39403///
39404/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fmadd_sd&expand=2578)
39405#[inline]
39406#[target_feature(enable = "avx512f")]
39407#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39408#[cfg_attr(test, assert_instr(vfmadd))]
39409#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
39410pub const fn _mm_mask_fmadd_sd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
39411 unsafe {
39412 let mut fmadd: f64 = simd_extract!(a, 0);
39413 if (k & 0b00000001) != 0 {
39414 let extractb: f64 = simd_extract!(b, 0);
39415 let extractc: f64 = simd_extract!(c, 0);
39416 fmadd = fmaf64(a:fmadd, b:extractb, c:extractc);
39417 }
39418 simd_insert!(a, 0, fmadd)
39419 }
39420}
39421
39422/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
39423///
39424/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fmadd_sd&expand=2580)
39425#[inline]
39426#[target_feature(enable = "avx512f")]
39427#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39428#[cfg_attr(test, assert_instr(vfmadd))]
39429#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
39430pub const fn _mm_maskz_fmadd_sd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
39431 unsafe {
39432 let mut fmadd: f64 = 0.;
39433 if (k & 0b00000001) != 0 {
39434 let extracta: f64 = simd_extract!(a, 0);
39435 let extractb: f64 = simd_extract!(b, 0);
39436 let extractc: f64 = simd_extract!(c, 0);
39437 fmadd = fmaf64(a:extracta, b:extractb, c:extractc);
39438 }
39439 simd_insert!(a, 0, fmadd)
39440 }
39441}
39442
39443/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from c to the upper element of dst.
39444///
39445/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fmadd_sd&expand=2579)
39446#[inline]
39447#[target_feature(enable = "avx512f")]
39448#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39449#[cfg_attr(test, assert_instr(vfmadd))]
39450#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
39451pub const fn _mm_mask3_fmadd_sd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
39452 unsafe {
39453 let mut fmadd: f64 = simd_extract!(c, 0);
39454 if (k & 0b00000001) != 0 {
39455 let extracta: f64 = simd_extract!(a, 0);
39456 let extractb: f64 = simd_extract!(b, 0);
39457 fmadd = fmaf64(a:extracta, b:extractb, c:fmadd);
39458 }
39459 simd_insert!(c, 0, fmadd)
39460 }
39461}
39462
39463/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.
39464///
39465/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fmsub_ss&expand=2668)
39466#[inline]
39467#[target_feature(enable = "avx512f")]
39468#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39469#[cfg_attr(test, assert_instr(vfmsub))]
39470#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
39471pub const fn _mm_mask_fmsub_ss(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
39472 unsafe {
39473 let mut fmsub: f32 = simd_extract!(a, 0);
39474 if (k & 0b00000001) != 0 {
39475 let extractb: f32 = simd_extract!(b, 0);
39476 let extractc: f32 = simd_extract!(c, 0);
39477 let extractc: f32 = -extractc;
39478 fmsub = fmaf32(a:fmsub, b:extractb, c:extractc);
39479 }
39480 simd_insert!(a, 0, fmsub)
39481 }
39482}
39483
39484/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
39485///
39486/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fmsub_ss&expand=2670)
39487#[inline]
39488#[target_feature(enable = "avx512f")]
39489#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39490#[cfg_attr(test, assert_instr(vfmsub))]
39491#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
39492pub const fn _mm_maskz_fmsub_ss(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
39493 unsafe {
39494 let mut fmsub: f32 = 0.;
39495 if (k & 0b00000001) != 0 {
39496 let extracta: f32 = simd_extract!(a, 0);
39497 let extractb: f32 = simd_extract!(b, 0);
39498 let extractc: f32 = simd_extract!(c, 0);
39499 let extractc: f32 = -extractc;
39500 fmsub = fmaf32(a:extracta, b:extractb, c:extractc);
39501 }
39502 simd_insert!(a, 0, fmsub)
39503 }
39504}
39505
39506/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from c to the upper elements of dst.
39507///
39508/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fmsub_ss&expand=2669)
39509#[inline]
39510#[target_feature(enable = "avx512f")]
39511#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39512#[cfg_attr(test, assert_instr(vfmsub))]
39513#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
39514pub const fn _mm_mask3_fmsub_ss(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
39515 unsafe {
39516 let mut fmsub: f32 = simd_extract!(c, 0);
39517 if (k & 0b00000001) != 0 {
39518 let extracta: f32 = simd_extract!(a, 0);
39519 let extractb: f32 = simd_extract!(b, 0);
39520 let extractc: f32 = -fmsub;
39521 fmsub = fmaf32(a:extracta, b:extractb, c:extractc);
39522 }
39523 simd_insert!(c, 0, fmsub)
39524 }
39525}
39526
39527/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
39528///
39529/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fmsub_sd&expand=2664)
39530#[inline]
39531#[target_feature(enable = "avx512f")]
39532#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39533#[cfg_attr(test, assert_instr(vfmsub))]
39534#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
39535pub const fn _mm_mask_fmsub_sd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
39536 unsafe {
39537 let mut fmsub: f64 = simd_extract!(a, 0);
39538 if (k & 0b00000001) != 0 {
39539 let extractb: f64 = simd_extract!(b, 0);
39540 let extractc: f64 = simd_extract!(c, 0);
39541 let extractc: f64 = -extractc;
39542 fmsub = fmaf64(a:fmsub, b:extractb, c:extractc);
39543 }
39544 simd_insert!(a, 0, fmsub)
39545 }
39546}
39547
39548/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
39549///
39550/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fmsub_sd&expand=2666)
39551#[inline]
39552#[target_feature(enable = "avx512f")]
39553#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39554#[cfg_attr(test, assert_instr(vfmsub))]
39555#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
39556pub const fn _mm_maskz_fmsub_sd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
39557 unsafe {
39558 let mut fmsub: f64 = 0.;
39559 if (k & 0b00000001) != 0 {
39560 let extracta: f64 = simd_extract!(a, 0);
39561 let extractb: f64 = simd_extract!(b, 0);
39562 let extractc: f64 = simd_extract!(c, 0);
39563 let extractc: f64 = -extractc;
39564 fmsub = fmaf64(a:extracta, b:extractb, c:extractc);
39565 }
39566 simd_insert!(a, 0, fmsub)
39567 }
39568}
39569
39570/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from c to the upper element of dst.
39571///
39572/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fmsub_sd&expand=2665)
39573#[inline]
39574#[target_feature(enable = "avx512f")]
39575#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39576#[cfg_attr(test, assert_instr(vfmsub))]
39577#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
39578pub const fn _mm_mask3_fmsub_sd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
39579 unsafe {
39580 let mut fmsub: f64 = simd_extract!(c, 0);
39581 if (k & 0b00000001) != 0 {
39582 let extracta: f64 = simd_extract!(a, 0);
39583 let extractb: f64 = simd_extract!(b, 0);
39584 let extractc: f64 = -fmsub;
39585 fmsub = fmaf64(a:extracta, b:extractb, c:extractc);
39586 }
39587 simd_insert!(c, 0, fmsub)
39588 }
39589}
39590
39591/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
39592///
39593/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fnmadd_ss&expand=2748)
39594#[inline]
39595#[target_feature(enable = "avx512f")]
39596#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39597#[cfg_attr(test, assert_instr(vfnmadd))]
39598#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
39599pub const fn _mm_mask_fnmadd_ss(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
39600 unsafe {
39601 let mut fnmadd: f32 = simd_extract!(a, 0);
39602 if (k & 0b00000001) != 0 {
39603 let extracta: f32 = -fnmadd;
39604 let extractb: f32 = simd_extract!(b, 0);
39605 let extractc: f32 = simd_extract!(c, 0);
39606 fnmadd = fmaf32(a:extracta, b:extractb, c:extractc);
39607 }
39608 simd_insert!(a, 0, fnmadd)
39609 }
39610}
39611
39612/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
39613///
39614/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fnmadd_ss&expand=2750)
39615#[inline]
39616#[target_feature(enable = "avx512f")]
39617#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39618#[cfg_attr(test, assert_instr(vfnmadd))]
39619#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
39620pub const fn _mm_maskz_fnmadd_ss(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
39621 unsafe {
39622 let mut fnmadd: f32 = 0.;
39623 if (k & 0b00000001) != 0 {
39624 let extracta: f32 = simd_extract!(a, 0);
39625 let extracta: f32 = -extracta;
39626 let extractb: f32 = simd_extract!(b, 0);
39627 let extractc: f32 = simd_extract!(c, 0);
39628 fnmadd = fmaf32(a:extracta, b:extractb, c:extractc);
39629 }
39630 simd_insert!(a, 0, fnmadd)
39631 }
39632}
39633
39634/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from c to the upper elements of dst.
39635///
39636/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fnmadd_ss&expand=2749)
39637#[inline]
39638#[target_feature(enable = "avx512f")]
39639#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39640#[cfg_attr(test, assert_instr(vfnmadd))]
39641#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
39642pub const fn _mm_mask3_fnmadd_ss(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
39643 unsafe {
39644 let mut fnmadd: f32 = simd_extract!(c, 0);
39645 if (k & 0b00000001) != 0 {
39646 let extracta: f32 = simd_extract!(a, 0);
39647 let extracta: f32 = -extracta;
39648 let extractb: f32 = simd_extract!(b, 0);
39649 fnmadd = fmaf32(a:extracta, b:extractb, c:fnmadd);
39650 }
39651 simd_insert!(c, 0, fnmadd)
39652 }
39653}
39654
39655/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
39656///
39657/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fnmadd_sd&expand=2744)
39658#[inline]
39659#[target_feature(enable = "avx512f")]
39660#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39661#[cfg_attr(test, assert_instr(vfnmadd))]
39662#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
39663pub const fn _mm_mask_fnmadd_sd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
39664 unsafe {
39665 let mut fnmadd: f64 = simd_extract!(a, 0);
39666 if (k & 0b00000001) != 0 {
39667 let extracta: f64 = -fnmadd;
39668 let extractb: f64 = simd_extract!(b, 0);
39669 let extractc: f64 = simd_extract!(c, 0);
39670 fnmadd = fmaf64(a:extracta, b:extractb, c:extractc);
39671 }
39672 simd_insert!(a, 0, fnmadd)
39673 }
39674}
39675
39676/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
39677///
39678/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fnmadd_sd&expand=2746)
39679#[inline]
39680#[target_feature(enable = "avx512f")]
39681#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39682#[cfg_attr(test, assert_instr(vfnmadd))]
39683#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
39684pub const fn _mm_maskz_fnmadd_sd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
39685 unsafe {
39686 let mut fnmadd: f64 = 0.;
39687 if (k & 0b00000001) != 0 {
39688 let extracta: f64 = simd_extract!(a, 0);
39689 let extracta: f64 = -extracta;
39690 let extractb: f64 = simd_extract!(b, 0);
39691 let extractc: f64 = simd_extract!(c, 0);
39692 fnmadd = fmaf64(a:extracta, b:extractb, c:extractc);
39693 }
39694 simd_insert!(a, 0, fnmadd)
39695 }
39696}
39697
39698/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from c to the upper element of dst.
39699///
39700/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fnmadd_sd&expand=2745)
39701#[inline]
39702#[target_feature(enable = "avx512f")]
39703#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39704#[cfg_attr(test, assert_instr(vfnmadd))]
39705#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
39706pub const fn _mm_mask3_fnmadd_sd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
39707 unsafe {
39708 let mut fnmadd: f64 = simd_extract!(c, 0);
39709 if (k & 0b00000001) != 0 {
39710 let extracta: f64 = simd_extract!(a, 0);
39711 let extracta: f64 = -extracta;
39712 let extractb: f64 = simd_extract!(b, 0);
39713 fnmadd = fmaf64(a:extracta, b:extractb, c:fnmadd);
39714 }
39715 simd_insert!(c, 0, fnmadd)
39716 }
39717}
39718
39719/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
39720///
39721/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fnmsub_ss&expand=2796)
39722#[inline]
39723#[target_feature(enable = "avx512f")]
39724#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39725#[cfg_attr(test, assert_instr(vfnmsub))]
39726#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
39727pub const fn _mm_mask_fnmsub_ss(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
39728 unsafe {
39729 let mut fnmsub: f32 = simd_extract!(a, 0);
39730 if (k & 0b00000001) != 0 {
39731 let extracta: f32 = -fnmsub;
39732 let extractb: f32 = simd_extract!(b, 0);
39733 let extractc: f32 = simd_extract!(c, 0);
39734 let extractc: f32 = -extractc;
39735 fnmsub = fmaf32(a:extracta, b:extractb, c:extractc);
39736 }
39737 simd_insert!(a, 0, fnmsub)
39738 }
39739}
39740
39741/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
39742///
39743/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fnmsub_ss&expand=2798)
39744#[inline]
39745#[target_feature(enable = "avx512f")]
39746#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39747#[cfg_attr(test, assert_instr(vfnmsub))]
39748#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
39749pub const fn _mm_maskz_fnmsub_ss(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
39750 unsafe {
39751 let mut fnmsub: f32 = 0.;
39752 if (k & 0b00000001) != 0 {
39753 let extracta: f32 = simd_extract!(a, 0);
39754 let extracta: f32 = -extracta;
39755 let extractb: f32 = simd_extract!(b, 0);
39756 let extractc: f32 = simd_extract!(c, 0);
39757 let extractc: f32 = -extractc;
39758 fnmsub = fmaf32(a:extracta, b:extractb, c:extractc);
39759 }
39760 simd_insert!(a, 0, fnmsub)
39761 }
39762}
39763
39764/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from c to the upper elements of dst.
39765///
39766/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fnmsub_ss&expand=2797)
39767#[inline]
39768#[target_feature(enable = "avx512f")]
39769#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39770#[cfg_attr(test, assert_instr(vfnmsub))]
39771#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
39772pub const fn _mm_mask3_fnmsub_ss(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
39773 unsafe {
39774 let mut fnmsub: f32 = simd_extract!(c, 0);
39775 if (k & 0b00000001) != 0 {
39776 let extracta: f32 = simd_extract!(a, 0);
39777 let extracta: f32 = -extracta;
39778 let extractb: f32 = simd_extract!(b, 0);
39779 let extractc: f32 = -fnmsub;
39780 fnmsub = fmaf32(a:extracta, b:extractb, c:extractc);
39781 }
39782 simd_insert!(c, 0, fnmsub)
39783 }
39784}
39785
39786/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
39787///
39788/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fnmsub_sd&expand=2792)
39789#[inline]
39790#[target_feature(enable = "avx512f")]
39791#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39792#[cfg_attr(test, assert_instr(vfnmsub))]
39793#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
39794pub const fn _mm_mask_fnmsub_sd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
39795 unsafe {
39796 let mut fnmsub: f64 = simd_extract!(a, 0);
39797 if (k & 0b00000001) != 0 {
39798 let extracta: f64 = -fnmsub;
39799 let extractb: f64 = simd_extract!(b, 0);
39800 let extractc: f64 = simd_extract!(c, 0);
39801 let extractc: f64 = -extractc;
39802 fnmsub = fmaf64(a:extracta, b:extractb, c:extractc);
39803 }
39804 simd_insert!(a, 0, fnmsub)
39805 }
39806}
39807
39808/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
39809///
39810/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fnmsub_sd&expand=2794)
39811#[inline]
39812#[target_feature(enable = "avx512f")]
39813#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39814#[cfg_attr(test, assert_instr(vfnmsub))]
39815#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
39816pub const fn _mm_maskz_fnmsub_sd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
39817 unsafe {
39818 let mut fnmsub: f64 = 0.;
39819 if (k & 0b00000001) != 0 {
39820 let extracta: f64 = simd_extract!(a, 0);
39821 let extracta: f64 = -extracta;
39822 let extractb: f64 = simd_extract!(b, 0);
39823 let extractc: f64 = simd_extract!(c, 0);
39824 let extractc: f64 = -extractc;
39825 fnmsub = fmaf64(a:extracta, b:extractb, c:extractc);
39826 }
39827 simd_insert!(a, 0, fnmsub)
39828 }
39829}
39830
39831/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from c to the upper element of dst.
39832///
39833/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fnmsub_sd&expand=2793)
39834#[inline]
39835#[target_feature(enable = "avx512f")]
39836#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39837#[cfg_attr(test, assert_instr(vfnmsub))]
39838#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
39839pub const fn _mm_mask3_fnmsub_sd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
39840 unsafe {
39841 let mut fnmsub: f64 = simd_extract!(c, 0);
39842 if (k & 0b00000001) != 0 {
39843 let extracta: f64 = simd_extract!(a, 0);
39844 let extracta: f64 = -extracta;
39845 let extractb: f64 = simd_extract!(b, 0);
39846 let extractc: f64 = -fnmsub;
39847 fnmsub = fmaf64(a:extracta, b:extractb, c:extractc);
39848 }
39849 simd_insert!(c, 0, fnmsub)
39850 }
39851}
39852
39853/// Add the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
39854///
39855/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39856/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
39857/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
39858/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
39859/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
39860/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39861///
39862/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_add_round_ss&expand=151)
39863#[inline]
39864#[target_feature(enable = "avx512f")]
39865#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39866#[cfg_attr(test, assert_instr(vaddss, ROUNDING = 8))]
39867#[rustc_legacy_const_generics(2)]
39868pub fn _mm_add_round_ss<const ROUNDING: i32>(a: __m128, b: __m128) -> __m128 {
39869 unsafe {
39870 static_assert_rounding!(ROUNDING);
39871 let a: Simd = a.as_f32x4();
39872 let b: Simd = b.as_f32x4();
39873 let r: Simd = vaddss(a, b, src:f32x4::ZERO, mask:0b1, ROUNDING);
39874 transmute(src:r)
39875 }
39876}
39877
39878/// Add the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
39879///
39880/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39881/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
39882/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
39883/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
39884/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
39885/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39886///
39887/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_add_round_ss&expand=152)
39888#[inline]
39889#[target_feature(enable = "avx512f")]
39890#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39891#[cfg_attr(test, assert_instr(vaddss, ROUNDING = 8))]
39892#[rustc_legacy_const_generics(4)]
39893pub fn _mm_mask_add_round_ss<const ROUNDING: i32>(
39894 src: __m128,
39895 k: __mmask8,
39896 a: __m128,
39897 b: __m128,
39898) -> __m128 {
39899 unsafe {
39900 static_assert_rounding!(ROUNDING);
39901 let a: Simd = a.as_f32x4();
39902 let b: Simd = b.as_f32x4();
39903 let src: Simd = src.as_f32x4();
39904 let r: Simd = vaddss(a, b, src, mask:k, ROUNDING);
39905 transmute(src:r)
39906 }
39907}
39908
39909/// Add the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
39910///
39911/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39912/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
39913/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
39914/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
39915/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
39916/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39917///
39918/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_add_round_ss&expand=153)
39919#[inline]
39920#[target_feature(enable = "avx512f")]
39921#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39922#[cfg_attr(test, assert_instr(vaddss, ROUNDING = 8))]
39923#[rustc_legacy_const_generics(3)]
39924pub fn _mm_maskz_add_round_ss<const ROUNDING: i32>(k: __mmask8, a: __m128, b: __m128) -> __m128 {
39925 unsafe {
39926 static_assert_rounding!(ROUNDING);
39927 let a: Simd = a.as_f32x4();
39928 let b: Simd = b.as_f32x4();
39929 let r: Simd = vaddss(a, b, src:f32x4::ZERO, mask:k, ROUNDING);
39930 transmute(src:r)
39931 }
39932}
39933
39934/// Add the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
39935///
39936/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39937/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
39938/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
39939/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
39940/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
39941/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39942///
39943/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_add_round_sd&expand=148)
39944#[inline]
39945#[target_feature(enable = "avx512f")]
39946#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39947#[cfg_attr(test, assert_instr(vaddsd, ROUNDING = 8))]
39948#[rustc_legacy_const_generics(2)]
39949pub fn _mm_add_round_sd<const ROUNDING: i32>(a: __m128d, b: __m128d) -> __m128d {
39950 unsafe {
39951 static_assert_rounding!(ROUNDING);
39952 let a: Simd = a.as_f64x2();
39953 let b: Simd = b.as_f64x2();
39954 let r: Simd = vaddsd(a, b, src:f64x2::ZERO, mask:0b1, ROUNDING);
39955 transmute(src:r)
39956 }
39957}
39958
39959/// Add the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
39960///
39961/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39962/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
39963/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
39964/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
39965/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
39966/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39967///
39968/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_add_round_sd&expand=149)
39969#[inline]
39970#[target_feature(enable = "avx512f")]
39971#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39972#[cfg_attr(test, assert_instr(vaddsd, ROUNDING = 8))]
39973#[rustc_legacy_const_generics(4)]
39974pub fn _mm_mask_add_round_sd<const ROUNDING: i32>(
39975 src: __m128d,
39976 k: __mmask8,
39977 a: __m128d,
39978 b: __m128d,
39979) -> __m128d {
39980 unsafe {
39981 static_assert_rounding!(ROUNDING);
39982 let a: Simd = a.as_f64x2();
39983 let b: Simd = b.as_f64x2();
39984 let src: Simd = src.as_f64x2();
39985 let r: Simd = vaddsd(a, b, src, mask:k, ROUNDING);
39986 transmute(src:r)
39987 }
39988}
39989
39990/// Add the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
39991///
39992/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39993/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
39994/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
39995/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
39996/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
39997/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39998///
39999/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_add_round_sd&expand=150)
40000#[inline]
40001#[target_feature(enable = "avx512f")]
40002#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40003#[cfg_attr(test, assert_instr(vaddsd, ROUNDING = 8))]
40004#[rustc_legacy_const_generics(3)]
40005pub fn _mm_maskz_add_round_sd<const ROUNDING: i32>(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
40006 unsafe {
40007 static_assert_rounding!(ROUNDING);
40008 let a: Simd = a.as_f64x2();
40009 let b: Simd = b.as_f64x2();
40010 let r: Simd = vaddsd(a, b, src:f64x2::ZERO, mask:k, ROUNDING);
40011 transmute(src:r)
40012 }
40013}
40014
40015/// Subtract the lower single-precision (32-bit) floating-point element in b from the lower single-precision (32-bit) floating-point element in a, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
40016///
40017/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40018/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40019/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40020/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40021/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40022/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40023///
40024/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_sub_round_ss&expand=5745)
40025#[inline]
40026#[target_feature(enable = "avx512f")]
40027#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40028#[cfg_attr(test, assert_instr(vsubss, ROUNDING = 8))]
40029#[rustc_legacy_const_generics(2)]
40030pub fn _mm_sub_round_ss<const ROUNDING: i32>(a: __m128, b: __m128) -> __m128 {
40031 unsafe {
40032 static_assert_rounding!(ROUNDING);
40033 let a: Simd = a.as_f32x4();
40034 let b: Simd = b.as_f32x4();
40035 let r: Simd = vsubss(a, b, src:f32x4::ZERO, mask:0b1, ROUNDING);
40036 transmute(src:r)
40037 }
40038}
40039
40040/// Subtract the lower single-precision (32-bit) floating-point element in b from the lower single-precision (32-bit) floating-point element in a, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
40041///
40042/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40043/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40044/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40045/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40046/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40047/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40048///
40049/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_sub_round_ss&expand=5743)
40050#[inline]
40051#[target_feature(enable = "avx512f")]
40052#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40053#[cfg_attr(test, assert_instr(vsubss, ROUNDING = 8))]
40054#[rustc_legacy_const_generics(4)]
40055pub fn _mm_mask_sub_round_ss<const ROUNDING: i32>(
40056 src: __m128,
40057 k: __mmask8,
40058 a: __m128,
40059 b: __m128,
40060) -> __m128 {
40061 unsafe {
40062 static_assert_rounding!(ROUNDING);
40063 let a: Simd = a.as_f32x4();
40064 let b: Simd = b.as_f32x4();
40065 let src: Simd = src.as_f32x4();
40066 let r: Simd = vsubss(a, b, src, mask:k, ROUNDING);
40067 transmute(src:r)
40068 }
40069}
40070
40071/// Subtract the lower single-precision (32-bit) floating-point element in b from the lower single-precision (32-bit) floating-point element in a, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
40072///
40073/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40074/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40075/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40076/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40077/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40078/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40079///
40080/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_sub_round_ss&expand=5744)
40081#[inline]
40082#[target_feature(enable = "avx512f")]
40083#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40084#[cfg_attr(test, assert_instr(vsubss, ROUNDING = 8))]
40085#[rustc_legacy_const_generics(3)]
40086pub fn _mm_maskz_sub_round_ss<const ROUNDING: i32>(k: __mmask8, a: __m128, b: __m128) -> __m128 {
40087 unsafe {
40088 static_assert_rounding!(ROUNDING);
40089 let a: Simd = a.as_f32x4();
40090 let b: Simd = b.as_f32x4();
40091 let r: Simd = vsubss(a, b, src:f32x4::ZERO, mask:k, ROUNDING);
40092 transmute(src:r)
40093 }
40094}
40095
40096/// Subtract the lower double-precision (64-bit) floating-point element in b from the lower double-precision (64-bit) floating-point element in a, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
40097///
40098/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40099/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40100/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40101/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40102/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40103/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40104///
40105/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_sub_round_sd&expand=5742)
40106#[inline]
40107#[target_feature(enable = "avx512f")]
40108#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40109#[cfg_attr(test, assert_instr(vsubsd, ROUNDING = 8))]
40110#[rustc_legacy_const_generics(2)]
40111pub fn _mm_sub_round_sd<const ROUNDING: i32>(a: __m128d, b: __m128d) -> __m128d {
40112 unsafe {
40113 static_assert_rounding!(ROUNDING);
40114 let a: Simd = a.as_f64x2();
40115 let b: Simd = b.as_f64x2();
40116 let r: Simd = vsubsd(a, b, src:f64x2::ZERO, mask:0b1, ROUNDING);
40117 transmute(src:r)
40118 }
40119}
40120
40121/// Subtract the lower double-precision (64-bit) floating-point element in b from the lower double-precision (64-bit) floating-point element in a, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
40122///
40123/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40124/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40125/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40126/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40127/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40128/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40129///
40130/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_sub_round_sd&expand=5740)
40131#[inline]
40132#[target_feature(enable = "avx512f")]
40133#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40134#[cfg_attr(test, assert_instr(vsubsd, ROUNDING = 8))]
40135#[rustc_legacy_const_generics(4)]
40136pub fn _mm_mask_sub_round_sd<const ROUNDING: i32>(
40137 src: __m128d,
40138 k: __mmask8,
40139 a: __m128d,
40140 b: __m128d,
40141) -> __m128d {
40142 unsafe {
40143 static_assert_rounding!(ROUNDING);
40144 let a: Simd = a.as_f64x2();
40145 let b: Simd = b.as_f64x2();
40146 let src: Simd = src.as_f64x2();
40147 let r: Simd = vsubsd(a, b, src, mask:k, ROUNDING);
40148 transmute(src:r)
40149 }
40150}
40151
40152/// Subtract the lower double-precision (64-bit) floating-point element in b from the lower double-precision (64-bit) floating-point element in a, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
40153///
40154/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40155/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40156/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40157/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40158/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40159/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40160///
40161/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_sub_round_sd&expand=5741)
40162#[inline]
40163#[target_feature(enable = "avx512f")]
40164#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40165#[cfg_attr(test, assert_instr(vsubsd, ROUNDING = 8))]
40166#[rustc_legacy_const_generics(3)]
40167pub fn _mm_maskz_sub_round_sd<const ROUNDING: i32>(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
40168 unsafe {
40169 static_assert_rounding!(ROUNDING);
40170 let a: Simd = a.as_f64x2();
40171 let b: Simd = b.as_f64x2();
40172 let r: Simd = vsubsd(a, b, src:f64x2::ZERO, mask:k, ROUNDING);
40173 transmute(src:r)
40174 }
40175}
40176
40177/// Multiply the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
40178///
40179/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40180/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40181/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40182/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40183/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40184/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40185///
40186/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mul_round_ss&expand=3946)
40187#[inline]
40188#[target_feature(enable = "avx512f")]
40189#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40190#[cfg_attr(test, assert_instr(vmulss, ROUNDING = 8))]
40191#[rustc_legacy_const_generics(2)]
40192pub fn _mm_mul_round_ss<const ROUNDING: i32>(a: __m128, b: __m128) -> __m128 {
40193 unsafe {
40194 static_assert_rounding!(ROUNDING);
40195 let a: Simd = a.as_f32x4();
40196 let b: Simd = b.as_f32x4();
40197 let r: Simd = vmulss(a, b, src:f32x4::ZERO, mask:0b1, ROUNDING);
40198 transmute(src:r)
40199 }
40200}
40201
40202/// Multiply the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
40203///
40204/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40205/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40206/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40207/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40208/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40209/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40210///
40211/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_mul_round_ss&expand=3944)
40212#[inline]
40213#[target_feature(enable = "avx512f")]
40214#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40215#[cfg_attr(test, assert_instr(vmulss, ROUNDING = 8))]
40216#[rustc_legacy_const_generics(4)]
40217pub fn _mm_mask_mul_round_ss<const ROUNDING: i32>(
40218 src: __m128,
40219 k: __mmask8,
40220 a: __m128,
40221 b: __m128,
40222) -> __m128 {
40223 unsafe {
40224 static_assert_rounding!(ROUNDING);
40225 let a: Simd = a.as_f32x4();
40226 let b: Simd = b.as_f32x4();
40227 let src: Simd = src.as_f32x4();
40228 let r: Simd = vmulss(a, b, src, mask:k, ROUNDING);
40229 transmute(src:r)
40230 }
40231}
40232
40233/// Multiply the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
40234///
40235/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40236/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40237/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40238/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40239/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40240/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40241///
40242/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_mul_round_ss&expand=3945)
40243#[inline]
40244#[target_feature(enable = "avx512f")]
40245#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40246#[cfg_attr(test, assert_instr(vmulss, ROUNDING = 8))]
40247#[rustc_legacy_const_generics(3)]
40248pub fn _mm_maskz_mul_round_ss<const ROUNDING: i32>(k: __mmask8, a: __m128, b: __m128) -> __m128 {
40249 unsafe {
40250 static_assert_rounding!(ROUNDING);
40251 let a: Simd = a.as_f32x4();
40252 let b: Simd = b.as_f32x4();
40253 let r: Simd = vmulss(a, b, src:f32x4::ZERO, mask:k, ROUNDING);
40254 transmute(src:r)
40255 }
40256}
40257
40258/// Multiply the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
40259///
40260/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40261/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40262/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40263/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40264/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40265/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40266///
40267/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mul_round_sd&expand=3943)
40268#[inline]
40269#[target_feature(enable = "avx512f")]
40270#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40271#[cfg_attr(test, assert_instr(vmulsd, ROUNDING = 8))]
40272#[rustc_legacy_const_generics(2)]
40273pub fn _mm_mul_round_sd<const ROUNDING: i32>(a: __m128d, b: __m128d) -> __m128d {
40274 unsafe {
40275 static_assert_rounding!(ROUNDING);
40276 let a: Simd = a.as_f64x2();
40277 let b: Simd = b.as_f64x2();
40278 let r: Simd = vmulsd(a, b, src:f64x2::ZERO, mask:0b1, ROUNDING);
40279 transmute(src:r)
40280 }
40281}
40282
40283/// Multiply the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
40284///
40285/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40286/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40287/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40288/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40289/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40290/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40291///
40292/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_mul_round_sd&expand=3941)
40293#[inline]
40294#[target_feature(enable = "avx512f")]
40295#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40296#[cfg_attr(test, assert_instr(vmulsd, ROUNDING = 8))]
40297#[rustc_legacy_const_generics(4)]
40298pub fn _mm_mask_mul_round_sd<const ROUNDING: i32>(
40299 src: __m128d,
40300 k: __mmask8,
40301 a: __m128d,
40302 b: __m128d,
40303) -> __m128d {
40304 unsafe {
40305 static_assert_rounding!(ROUNDING);
40306 let a: Simd = a.as_f64x2();
40307 let b: Simd = b.as_f64x2();
40308 let src: Simd = src.as_f64x2();
40309 let r: Simd = vmulsd(a, b, src, mask:k, ROUNDING);
40310 transmute(src:r)
40311 }
40312}
40313
40314/// Multiply the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
40315///
40316/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40317/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40318/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40319/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40320/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40321/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40322///
40323/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_mul_round_sd&expand=3942)
40324#[inline]
40325#[target_feature(enable = "avx512f")]
40326#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40327#[cfg_attr(test, assert_instr(vmulsd, ROUNDING = 8))]
40328#[rustc_legacy_const_generics(3)]
40329pub fn _mm_maskz_mul_round_sd<const ROUNDING: i32>(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
40330 unsafe {
40331 static_assert_rounding!(ROUNDING);
40332 let a: Simd = a.as_f64x2();
40333 let b: Simd = b.as_f64x2();
40334 let r: Simd = vmulsd(a, b, src:f64x2::ZERO, mask:k, ROUNDING);
40335 transmute(src:r)
40336 }
40337}
40338
40339/// Divide the lower single-precision (32-bit) floating-point element in a by the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
40340///
40341/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40342/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40343/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40344/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40345/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40346/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40347///
40348/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_div_round_ss&expand=2174)
40349#[inline]
40350#[target_feature(enable = "avx512f")]
40351#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40352#[cfg_attr(test, assert_instr(vdivss, ROUNDING = 8))]
40353#[rustc_legacy_const_generics(2)]
40354pub fn _mm_div_round_ss<const ROUNDING: i32>(a: __m128, b: __m128) -> __m128 {
40355 unsafe {
40356 static_assert_rounding!(ROUNDING);
40357 let a: Simd = a.as_f32x4();
40358 let b: Simd = b.as_f32x4();
40359 let r: Simd = vdivss(a, b, src:f32x4::ZERO, mask:0b1, ROUNDING);
40360 transmute(src:r)
40361 }
40362}
40363
40364/// Divide the lower single-precision (32-bit) floating-point element in a by the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
40365///
40366/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40367/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40368/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40369/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40370/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40371/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40372///
40373/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_div_round_ss&expand=2175)
40374#[inline]
40375#[target_feature(enable = "avx512f")]
40376#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40377#[cfg_attr(test, assert_instr(vdivss, ROUNDING = 8))]
40378#[rustc_legacy_const_generics(4)]
40379pub fn _mm_mask_div_round_ss<const ROUNDING: i32>(
40380 src: __m128,
40381 k: __mmask8,
40382 a: __m128,
40383 b: __m128,
40384) -> __m128 {
40385 unsafe {
40386 static_assert_rounding!(ROUNDING);
40387 let a: Simd = a.as_f32x4();
40388 let b: Simd = b.as_f32x4();
40389 let src: Simd = src.as_f32x4();
40390 let r: Simd = vdivss(a, b, src, mask:k, ROUNDING);
40391 transmute(src:r)
40392 }
40393}
40394
40395/// Divide the lower single-precision (32-bit) floating-point element in a by the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
40396///
40397/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40398/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40399/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40400/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40401/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40402/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40403///
40404/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_div_round_ss&expand=2176)
40405#[inline]
40406#[target_feature(enable = "avx512f")]
40407#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40408#[cfg_attr(test, assert_instr(vdivss, ROUNDING = 8))]
40409#[rustc_legacy_const_generics(3)]
40410pub fn _mm_maskz_div_round_ss<const ROUNDING: i32>(k: __mmask8, a: __m128, b: __m128) -> __m128 {
40411 unsafe {
40412 static_assert_rounding!(ROUNDING);
40413 let a: Simd = a.as_f32x4();
40414 let b: Simd = b.as_f32x4();
40415 let r: Simd = vdivss(a, b, src:f32x4::ZERO, mask:k, ROUNDING);
40416 transmute(src:r)
40417 }
40418}
40419
40420/// Divide the lower double-precision (64-bit) floating-point element in a by the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
40421///
40422/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40423/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40424/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40425/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40426/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40427/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40428///
40429/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_div_round_sd&expand=2171)
40430#[inline]
40431#[target_feature(enable = "avx512f")]
40432#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40433#[cfg_attr(test, assert_instr(vdivsd, ROUNDING = 8))]
40434#[rustc_legacy_const_generics(2)]
40435pub fn _mm_div_round_sd<const ROUNDING: i32>(a: __m128d, b: __m128d) -> __m128d {
40436 unsafe {
40437 static_assert_rounding!(ROUNDING);
40438 let a: Simd = a.as_f64x2();
40439 let b: Simd = b.as_f64x2();
40440 let r: Simd = vdivsd(a, b, src:f64x2::ZERO, mask:0b1, ROUNDING);
40441 transmute(src:r)
40442 }
40443}
40444
40445/// Divide the lower double-precision (64-bit) floating-point element in a by the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
40446///
40447/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40448/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40449/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40450/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40451/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40452/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40453///
40454/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_div_round_sd&expand=2172)
40455#[inline]
40456#[target_feature(enable = "avx512f")]
40457#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40458#[cfg_attr(test, assert_instr(vdivsd, ROUNDING = 8))]
40459#[rustc_legacy_const_generics(4)]
40460pub fn _mm_mask_div_round_sd<const ROUNDING: i32>(
40461 src: __m128d,
40462 k: __mmask8,
40463 a: __m128d,
40464 b: __m128d,
40465) -> __m128d {
40466 unsafe {
40467 static_assert_rounding!(ROUNDING);
40468 let a: Simd = a.as_f64x2();
40469 let b: Simd = b.as_f64x2();
40470 let src: Simd = src.as_f64x2();
40471 let r: Simd = vdivsd(a, b, src, mask:k, ROUNDING);
40472 transmute(src:r)
40473 }
40474}
40475
40476/// Divide the lower double-precision (64-bit) floating-point element in a by the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
40477///
40478/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40479/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40480/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40481/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40482/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40483/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40484///
40485/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_div_round_sd&expand=2173)
40486#[inline]
40487#[target_feature(enable = "avx512f")]
40488#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40489#[cfg_attr(test, assert_instr(vdivsd, ROUNDING = 8))]
40490#[rustc_legacy_const_generics(3)]
40491pub fn _mm_maskz_div_round_sd<const ROUNDING: i32>(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
40492 unsafe {
40493 static_assert_rounding!(ROUNDING);
40494 let a: Simd = a.as_f64x2();
40495 let b: Simd = b.as_f64x2();
40496 let r: Simd = vdivsd(a, b, src:f64x2::ZERO, mask:k, ROUNDING);
40497 transmute(src:r)
40498 }
40499}
40500
40501/// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the maximum value in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
40502/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
40503///
40504/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_max_round_ss&expand=3668)
40505#[inline]
40506#[target_feature(enable = "avx512f")]
40507#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40508#[cfg_attr(test, assert_instr(vmaxss, SAE = 8))]
40509#[rustc_legacy_const_generics(2)]
40510pub fn _mm_max_round_ss<const SAE: i32>(a: __m128, b: __m128) -> __m128 {
40511 unsafe {
40512 static_assert_sae!(SAE);
40513 let a: Simd = a.as_f32x4();
40514 let b: Simd = b.as_f32x4();
40515 let r: Simd = vmaxss(a, b, src:f32x4::ZERO, mask:0b1, SAE);
40516 transmute(src:r)
40517 }
40518}
40519
40520/// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the maximum value in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
40521/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
40522///
40523/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_max_round_ss&expand=3672)
40524#[inline]
40525#[target_feature(enable = "avx512f")]
40526#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40527#[cfg_attr(test, assert_instr(vmaxss, SAE = 8))]
40528#[rustc_legacy_const_generics(4)]
40529pub fn _mm_mask_max_round_ss<const SAE: i32>(
40530 src: __m128,
40531 k: __mmask8,
40532 a: __m128,
40533 b: __m128,
40534) -> __m128 {
40535 unsafe {
40536 static_assert_sae!(SAE);
40537 let a: Simd = a.as_f32x4();
40538 let b: Simd = b.as_f32x4();
40539 let src: Simd = src.as_f32x4();
40540 let r: Simd = vmaxss(a, b, src, mask:k, SAE);
40541 transmute(src:r)
40542 }
40543}
40544
40545/// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the maximum value in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
40546/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
40547///
40548/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_max_round_ss&expand=3667)
40549#[inline]
40550#[target_feature(enable = "avx512f")]
40551#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40552#[cfg_attr(test, assert_instr(vmaxss, SAE = 8))]
40553#[rustc_legacy_const_generics(3)]
40554pub fn _mm_maskz_max_round_ss<const SAE: i32>(k: __mmask8, a: __m128, b: __m128) -> __m128 {
40555 unsafe {
40556 static_assert_sae!(SAE);
40557 let a: Simd = a.as_f32x4();
40558 let b: Simd = b.as_f32x4();
40559 let r: Simd = vmaxss(a, b, src:f32x4::ZERO, mask:k, SAE);
40560 transmute(src:r)
40561 }
40562}
40563
40564/// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the maximum value in the lower element of dst, and copy the upper element from a to the upper element of dst.\
40565/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
40566///
40567/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_max_round_sd&expand=3665)
40568#[inline]
40569#[target_feature(enable = "avx512f")]
40570#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40571#[cfg_attr(test, assert_instr(vmaxsd, SAE = 8))]
40572#[rustc_legacy_const_generics(2)]
40573pub fn _mm_max_round_sd<const SAE: i32>(a: __m128d, b: __m128d) -> __m128d {
40574 unsafe {
40575 static_assert_sae!(SAE);
40576 let a: Simd = a.as_f64x2();
40577 let b: Simd = b.as_f64x2();
40578 let r: Simd = vmaxsd(a, b, src:f64x2::ZERO, mask:0b1, SAE);
40579 transmute(src:r)
40580 }
40581}
40582
40583/// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the maximum value in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
40584/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
40585///
40586/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_max_round_sd&expand=3663)
40587#[inline]
40588#[target_feature(enable = "avx512f")]
40589#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40590#[cfg_attr(test, assert_instr(vmaxsd, SAE = 8))]
40591#[rustc_legacy_const_generics(4)]
40592pub fn _mm_mask_max_round_sd<const SAE: i32>(
40593 src: __m128d,
40594 k: __mmask8,
40595 a: __m128d,
40596 b: __m128d,
40597) -> __m128d {
40598 unsafe {
40599 static_assert_sae!(SAE);
40600 let a: Simd = a.as_f64x2();
40601 let b: Simd = b.as_f64x2();
40602 let src: Simd = src.as_f64x2();
40603 let r: Simd = vmaxsd(a, b, src, mask:k, SAE);
40604 transmute(src:r)
40605 }
40606}
40607
40608/// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the maximum value in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
40609/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
40610///
40611/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_max_round_sd&expand=3670)
40612#[inline]
40613#[target_feature(enable = "avx512f")]
40614#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40615#[cfg_attr(test, assert_instr(vmaxsd, SAE = 8))]
40616#[rustc_legacy_const_generics(3)]
40617pub fn _mm_maskz_max_round_sd<const SAE: i32>(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
40618 unsafe {
40619 static_assert_sae!(SAE);
40620 let a: Simd = a.as_f64x2();
40621 let b: Simd = b.as_f64x2();
40622 let r: Simd = vmaxsd(a, b, src:f64x2::ZERO, mask:k, SAE);
40623 transmute(src:r)
40624 }
40625}
40626
40627/// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the minimum value in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
40628/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
40629///
40630/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_min_round_ss&expand=3782)
40631#[inline]
40632#[target_feature(enable = "avx512f")]
40633#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40634#[cfg_attr(test, assert_instr(vminss, SAE = 8))]
40635#[rustc_legacy_const_generics(2)]
40636pub fn _mm_min_round_ss<const SAE: i32>(a: __m128, b: __m128) -> __m128 {
40637 unsafe {
40638 static_assert_sae!(SAE);
40639 let a: Simd = a.as_f32x4();
40640 let b: Simd = b.as_f32x4();
40641 let r: Simd = vminss(a, b, src:f32x4::ZERO, mask:0b1, SAE);
40642 transmute(src:r)
40643 }
40644}
40645
40646/// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the minimum value in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
40647/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
40648///
40649/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_min_round_ss&expand=3780)
40650#[inline]
40651#[target_feature(enable = "avx512f")]
40652#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40653#[cfg_attr(test, assert_instr(vminss, SAE = 8))]
40654#[rustc_legacy_const_generics(4)]
40655pub fn _mm_mask_min_round_ss<const SAE: i32>(
40656 src: __m128,
40657 k: __mmask8,
40658 a: __m128,
40659 b: __m128,
40660) -> __m128 {
40661 unsafe {
40662 static_assert_sae!(SAE);
40663 let a: Simd = a.as_f32x4();
40664 let b: Simd = b.as_f32x4();
40665 let src: Simd = src.as_f32x4();
40666 let r: Simd = vminss(a, b, src, mask:k, SAE);
40667 transmute(src:r)
40668 }
40669}
40670
40671/// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the minimum value in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
40672/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
40673///
40674/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_min_round_ss&expand=3781)
40675#[inline]
40676#[target_feature(enable = "avx512f")]
40677#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40678#[cfg_attr(test, assert_instr(vminss, SAE = 8))]
40679#[rustc_legacy_const_generics(3)]
40680pub fn _mm_maskz_min_round_ss<const SAE: i32>(k: __mmask8, a: __m128, b: __m128) -> __m128 {
40681 unsafe {
40682 static_assert_sae!(SAE);
40683 let a: Simd = a.as_f32x4();
40684 let b: Simd = b.as_f32x4();
40685 let r: Simd = vminss(a, b, src:f32x4::ZERO, mask:k, SAE);
40686 transmute(src:r)
40687 }
40688}
40689
40690/// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the minimum value in the lower element of dst , and copy the upper element from a to the upper element of dst.\
40691/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
40692///
40693/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_min_round_sd&expand=3779)
40694#[inline]
40695#[target_feature(enable = "avx512f")]
40696#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40697#[cfg_attr(test, assert_instr(vminsd, SAE = 8))]
40698#[rustc_legacy_const_generics(2)]
40699pub fn _mm_min_round_sd<const SAE: i32>(a: __m128d, b: __m128d) -> __m128d {
40700 unsafe {
40701 static_assert_sae!(SAE);
40702 let a: Simd = a.as_f64x2();
40703 let b: Simd = b.as_f64x2();
40704 let r: Simd = vminsd(a, b, src:f64x2::ZERO, mask:0b1, SAE);
40705 transmute(src:r)
40706 }
40707}
40708
40709/// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the minimum value in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
40710/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
40711///
40712/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_min_round_sd&expand=3777)
40713#[inline]
40714#[target_feature(enable = "avx512f")]
40715#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40716#[cfg_attr(test, assert_instr(vminsd, SAE = 8))]
40717#[rustc_legacy_const_generics(4)]
40718pub fn _mm_mask_min_round_sd<const SAE: i32>(
40719 src: __m128d,
40720 k: __mmask8,
40721 a: __m128d,
40722 b: __m128d,
40723) -> __m128d {
40724 unsafe {
40725 static_assert_sae!(SAE);
40726 let a: Simd = a.as_f64x2();
40727 let b: Simd = b.as_f64x2();
40728 let src: Simd = src.as_f64x2();
40729 let r: Simd = vminsd(a, b, src, mask:k, SAE);
40730 transmute(src:r)
40731 }
40732}
40733
40734/// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the minimum value in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
40735/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
40736///
40737/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_min_round_sd&expand=3778)
40738#[inline]
40739#[target_feature(enable = "avx512f")]
40740#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40741#[cfg_attr(test, assert_instr(vminsd, SAE = 8))]
40742#[rustc_legacy_const_generics(3)]
40743pub fn _mm_maskz_min_round_sd<const SAE: i32>(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
40744 unsafe {
40745 static_assert_sae!(SAE);
40746 let a: Simd = a.as_f64x2();
40747 let b: Simd = b.as_f64x2();
40748 let r: Simd = vminsd(a, b, src:f64x2::ZERO, mask:k, SAE);
40749 transmute(src:r)
40750 }
40751}
40752
40753/// Compute the square root of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
40754///
40755/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40756/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40757/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40758/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40759/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40760/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40761///
40762/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_sqrt_round_ss&expand=5383)
40763#[inline]
40764#[target_feature(enable = "avx512f")]
40765#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40766#[cfg_attr(test, assert_instr(vsqrtss, ROUNDING = 8))]
40767#[rustc_legacy_const_generics(2)]
40768pub fn _mm_sqrt_round_ss<const ROUNDING: i32>(a: __m128, b: __m128) -> __m128 {
40769 unsafe {
40770 static_assert_rounding!(ROUNDING);
40771 vsqrtss(a, b, src:_mm_setzero_ps(), mask:0b1, ROUNDING)
40772 }
40773}
40774
40775/// Compute the square root of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
40776///
40777/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40778/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40779/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40780/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40781/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40782/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40783///
40784/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_sqrt_round_ss&expand=5381)
40785#[inline]
40786#[target_feature(enable = "avx512f")]
40787#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40788#[cfg_attr(test, assert_instr(vsqrtss, ROUNDING = 8))]
40789#[rustc_legacy_const_generics(4)]
40790pub fn _mm_mask_sqrt_round_ss<const ROUNDING: i32>(
40791 src: __m128,
40792 k: __mmask8,
40793 a: __m128,
40794 b: __m128,
40795) -> __m128 {
40796 unsafe {
40797 static_assert_rounding!(ROUNDING);
40798 vsqrtss(a, b, src, mask:k, ROUNDING)
40799 }
40800}
40801
40802/// Compute the square root of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
40803///
40804/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40805/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40806/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40807/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40808/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40809/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40810///
40811/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_sqrt_round_ss&expand=5382)
40812#[inline]
40813#[target_feature(enable = "avx512f")]
40814#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40815#[cfg_attr(test, assert_instr(vsqrtss, ROUNDING = 8))]
40816#[rustc_legacy_const_generics(3)]
40817pub fn _mm_maskz_sqrt_round_ss<const ROUNDING: i32>(k: __mmask8, a: __m128, b: __m128) -> __m128 {
40818 unsafe {
40819 static_assert_rounding!(ROUNDING);
40820 vsqrtss(a, b, src:_mm_setzero_ps(), mask:k, ROUNDING)
40821 }
40822}
40823
40824/// Compute the square root of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
40825///
40826/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40827/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40828/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40829/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40830/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40831/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40832///
40833/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_sqrt_round_sd&expand=5380)
40834#[inline]
40835#[target_feature(enable = "avx512f")]
40836#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40837#[cfg_attr(test, assert_instr(vsqrtsd, ROUNDING = 8))]
40838#[rustc_legacy_const_generics(2)]
40839pub fn _mm_sqrt_round_sd<const ROUNDING: i32>(a: __m128d, b: __m128d) -> __m128d {
40840 unsafe {
40841 static_assert_rounding!(ROUNDING);
40842 vsqrtsd(a, b, src:_mm_setzero_pd(), mask:0b1, ROUNDING)
40843 }
40844}
40845
40846/// Compute the square root of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
40847///
40848/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40849/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40850/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40851/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40852/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40853/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40854///
40855/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_sqrt_round_sd&expand=5378)
40856#[inline]
40857#[target_feature(enable = "avx512f")]
40858#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40859#[cfg_attr(test, assert_instr(vsqrtsd, ROUNDING = 8))]
40860#[rustc_legacy_const_generics(4)]
40861pub fn _mm_mask_sqrt_round_sd<const ROUNDING: i32>(
40862 src: __m128d,
40863 k: __mmask8,
40864 a: __m128d,
40865 b: __m128d,
40866) -> __m128d {
40867 unsafe {
40868 static_assert_rounding!(ROUNDING);
40869 vsqrtsd(a, b, src, mask:k, ROUNDING)
40870 }
40871}
40872
40873/// Compute the square root of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
40874///
40875/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40876/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40877/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40878/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40879/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40880/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40881///
40882/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_sqrt_round_sd&expand=5379)
40883#[inline]
40884#[target_feature(enable = "avx512f")]
40885#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40886#[cfg_attr(test, assert_instr(vsqrtsd, ROUNDING = 8))]
40887#[rustc_legacy_const_generics(3)]
40888pub fn _mm_maskz_sqrt_round_sd<const ROUNDING: i32>(
40889 k: __mmask8,
40890 a: __m128d,
40891 b: __m128d,
40892) -> __m128d {
40893 unsafe {
40894 static_assert_rounding!(ROUNDING);
40895 vsqrtsd(a, b, src:_mm_setzero_pd(), mask:k, ROUNDING)
40896 }
40897}
40898
40899/// Convert the exponent of the lower single-precision (32-bit) floating-point element in b to a single-precision (32-bit) floating-point number representing the integer exponent, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.\
40900/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
40901///
40902/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_getexp_round_ss&expand=2856)
40903#[inline]
40904#[target_feature(enable = "avx512f")]
40905#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40906#[cfg_attr(test, assert_instr(vgetexpss, SAE = 8))]
40907#[rustc_legacy_const_generics(2)]
40908pub fn _mm_getexp_round_ss<const SAE: i32>(a: __m128, b: __m128) -> __m128 {
40909 unsafe {
40910 static_assert_sae!(SAE);
40911 let a: Simd = a.as_f32x4();
40912 let b: Simd = b.as_f32x4();
40913 let r: Simd = vgetexpss(a, b, src:f32x4::ZERO, mask:0b1, SAE);
40914 transmute(src:r)
40915 }
40916}
40917
40918/// Convert the exponent of the lower single-precision (32-bit) floating-point element in b to a single-precision (32-bit) floating-point number representing the integer exponent, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.\
40919/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
40920///
40921/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_getexp_round_ss&expand=2857)
40922#[inline]
40923#[target_feature(enable = "avx512f")]
40924#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40925#[cfg_attr(test, assert_instr(vgetexpss, SAE = 8))]
40926#[rustc_legacy_const_generics(4)]
40927pub fn _mm_mask_getexp_round_ss<const SAE: i32>(
40928 src: __m128,
40929 k: __mmask8,
40930 a: __m128,
40931 b: __m128,
40932) -> __m128 {
40933 unsafe {
40934 static_assert_sae!(SAE);
40935 let a: Simd = a.as_f32x4();
40936 let b: Simd = b.as_f32x4();
40937 let src: Simd = src.as_f32x4();
40938 let r: Simd = vgetexpss(a, b, src, mask:k, SAE);
40939 transmute(src:r)
40940 }
40941}
40942
40943/// Convert the exponent of the lower single-precision (32-bit) floating-point element in b to a single-precision (32-bit) floating-point number representing the integer exponent, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.\
40944/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
40945///
40946/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_getexp_round_ss&expand=2858)
40947#[inline]
40948#[target_feature(enable = "avx512f")]
40949#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40950#[cfg_attr(test, assert_instr(vgetexpss, SAE = 8))]
40951#[rustc_legacy_const_generics(3)]
40952pub fn _mm_maskz_getexp_round_ss<const SAE: i32>(k: __mmask8, a: __m128, b: __m128) -> __m128 {
40953 unsafe {
40954 static_assert_sae!(SAE);
40955 let a: Simd = a.as_f32x4();
40956 let b: Simd = b.as_f32x4();
40957 let r: Simd = vgetexpss(a, b, src:f32x4::ZERO, mask:k, SAE);
40958 transmute(src:r)
40959 }
40960}
40961
40962/// Convert the exponent of the lower double-precision (64-bit) floating-point element in b to a double-precision (64-bit) floating-point number representing the integer exponent, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.\
40963/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
40964///
40965/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_getexp_round_sd&expand=2853)
40966#[inline]
40967#[target_feature(enable = "avx512f")]
40968#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40969#[cfg_attr(test, assert_instr(vgetexpsd, SAE = 8))]
40970#[rustc_legacy_const_generics(2)]
40971pub fn _mm_getexp_round_sd<const SAE: i32>(a: __m128d, b: __m128d) -> __m128d {
40972 unsafe {
40973 static_assert_sae!(SAE);
40974 let a: Simd = a.as_f64x2();
40975 let b: Simd = b.as_f64x2();
40976 let r: Simd = vgetexpsd(a, b, src:f64x2::ZERO, mask:0b1, SAE);
40977 transmute(src:r)
40978 }
40979}
40980
40981/// Convert the exponent of the lower double-precision (64-bit) floating-point element in b to a double-precision (64-bit) floating-point number representing the integer exponent, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.\
40982/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
40983///
40984/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_getexp_round_sd&expand=2854)
40985#[inline]
40986#[target_feature(enable = "avx512f")]
40987#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40988#[cfg_attr(test, assert_instr(vgetexpsd, SAE = 8))]
40989#[rustc_legacy_const_generics(4)]
40990pub fn _mm_mask_getexp_round_sd<const SAE: i32>(
40991 src: __m128d,
40992 k: __mmask8,
40993 a: __m128d,
40994 b: __m128d,
40995) -> __m128d {
40996 unsafe {
40997 static_assert_sae!(SAE);
40998 let a: Simd = a.as_f64x2();
40999 let b: Simd = b.as_f64x2();
41000 let src: Simd = src.as_f64x2();
41001 let r: Simd = vgetexpsd(a, b, src, mask:k, SAE);
41002 transmute(src:r)
41003 }
41004}
41005
41006/// Convert the exponent of the lower double-precision (64-bit) floating-point element in b to a double-precision (64-bit) floating-point number representing the integer exponent, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.\
41007/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
41008///
41009/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_getexp_round_sd&expand=2855)
41010#[inline]
41011#[target_feature(enable = "avx512f")]
41012#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41013#[cfg_attr(test, assert_instr(vgetexpsd, SAE = 8))]
41014#[rustc_legacy_const_generics(3)]
41015pub fn _mm_maskz_getexp_round_sd<const SAE: i32>(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
41016 unsafe {
41017 static_assert_sae!(SAE);
41018 let a: Simd = a.as_f64x2();
41019 let b: Simd = b.as_f64x2();
41020 let r: Simd = vgetexpsd(a, b, src:f64x2::ZERO, mask:k, SAE);
41021 transmute(src:r)
41022 }
41023}
41024
41025/// Normalize the mantissas of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
41026/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
41027/// _MM_MANT_NORM_1_2 // interval [1, 2)\
41028/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
41029/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
41030/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
41031/// The sign is determined by sc which can take the following values:\
41032/// _MM_MANT_SIGN_src // sign = sign(src)\
41033/// _MM_MANT_SIGN_zero // sign = 0\
41034/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1\
41035/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
41036///
41037/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_getmant_round_ss&expand=2892)
41038#[inline]
41039#[target_feature(enable = "avx512f")]
41040#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41041#[cfg_attr(test, assert_instr(vgetmantss, NORM = 0, SIGN = 0, SAE = 4))]
41042#[rustc_legacy_const_generics(2, 3, 4)]
41043pub fn _mm_getmant_round_ss<
41044 const NORM: _MM_MANTISSA_NORM_ENUM,
41045 const SIGN: _MM_MANTISSA_SIGN_ENUM,
41046 const SAE: i32,
41047>(
41048 a: __m128,
41049 b: __m128,
41050) -> __m128 {
41051 unsafe {
41052 static_assert_uimm_bits!(NORM, 4);
41053 static_assert_uimm_bits!(SIGN, 2);
41054 static_assert_mantissas_sae!(SAE);
41055 let a: Simd = a.as_f32x4();
41056 let b: Simd = b.as_f32x4();
41057 let r: Simd = vgetmantss(a, b, SIGN << 2 | NORM, src:f32x4::ZERO, m:0b1, SAE);
41058 transmute(src:r)
41059 }
41060}
41061
41062/// Normalize the mantissas of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
41063/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
41064/// _MM_MANT_NORM_1_2 // interval [1, 2)\
41065/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
41066/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
41067/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
41068/// The sign is determined by sc which can take the following values:\
41069/// _MM_MANT_SIGN_src // sign = sign(src)\
41070/// _MM_MANT_SIGN_zero // sign = 0\
41071/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1\
41072/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
41073///
41074/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_getmant_round_ss&expand=2893)
41075#[inline]
41076#[target_feature(enable = "avx512f")]
41077#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41078#[cfg_attr(test, assert_instr(vgetmantss, NORM = 0, SIGN = 0, SAE = 4))]
41079#[rustc_legacy_const_generics(4, 5, 6)]
41080pub fn _mm_mask_getmant_round_ss<
41081 const NORM: _MM_MANTISSA_NORM_ENUM,
41082 const SIGN: _MM_MANTISSA_SIGN_ENUM,
41083 const SAE: i32,
41084>(
41085 src: __m128,
41086 k: __mmask8,
41087 a: __m128,
41088 b: __m128,
41089) -> __m128 {
41090 unsafe {
41091 static_assert_uimm_bits!(NORM, 4);
41092 static_assert_uimm_bits!(SIGN, 2);
41093 static_assert_mantissas_sae!(SAE);
41094 let a: Simd = a.as_f32x4();
41095 let b: Simd = b.as_f32x4();
41096 let src: Simd = src.as_f32x4();
41097 let r: Simd = vgetmantss(a, b, SIGN << 2 | NORM, src, m:k, SAE);
41098 transmute(src:r)
41099 }
41100}
41101
41102/// Normalize the mantissas of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
41103/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
41104/// _MM_MANT_NORM_1_2 // interval [1, 2)\
41105/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
41106/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
41107/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
41108/// The sign is determined by sc which can take the following values:\
41109/// _MM_MANT_SIGN_src // sign = sign(src)\
41110/// _MM_MANT_SIGN_zero // sign = 0\
41111/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1\
41112/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
41113///
41114/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_getmant_round_ss&expand=2894)
41115#[inline]
41116#[target_feature(enable = "avx512f")]
41117#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41118#[cfg_attr(test, assert_instr(vgetmantss, NORM = 0, SIGN = 0, SAE = 4))]
41119#[rustc_legacy_const_generics(3, 4, 5)]
41120pub fn _mm_maskz_getmant_round_ss<
41121 const NORM: _MM_MANTISSA_NORM_ENUM,
41122 const SIGN: _MM_MANTISSA_SIGN_ENUM,
41123 const SAE: i32,
41124>(
41125 k: __mmask8,
41126 a: __m128,
41127 b: __m128,
41128) -> __m128 {
41129 unsafe {
41130 static_assert_uimm_bits!(NORM, 4);
41131 static_assert_uimm_bits!(SIGN, 2);
41132 static_assert_mantissas_sae!(SAE);
41133 let a: Simd = a.as_f32x4();
41134 let b: Simd = b.as_f32x4();
41135 let r: Simd = vgetmantss(a, b, SIGN << 2 | NORM, src:f32x4::ZERO, m:k, SAE);
41136 transmute(src:r)
41137 }
41138}
41139
41140/// Normalize the mantissas of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
41141/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
41142/// _MM_MANT_NORM_1_2 // interval [1, 2)\
41143/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
41144/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
41145/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
41146/// The sign is determined by sc which can take the following values:\
41147/// _MM_MANT_SIGN_src // sign = sign(src)\
41148/// _MM_MANT_SIGN_zero // sign = 0\
41149/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1\
41150/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
41151///
41152/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_getmant_round_sd&expand=2889)
41153#[inline]
41154#[target_feature(enable = "avx512f")]
41155#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41156#[cfg_attr(test, assert_instr(vgetmantsd, NORM = 0, SIGN = 0, SAE = 4))]
41157#[rustc_legacy_const_generics(2, 3, 4)]
41158pub fn _mm_getmant_round_sd<
41159 const NORM: _MM_MANTISSA_NORM_ENUM,
41160 const SIGN: _MM_MANTISSA_SIGN_ENUM,
41161 const SAE: i32,
41162>(
41163 a: __m128d,
41164 b: __m128d,
41165) -> __m128d {
41166 unsafe {
41167 static_assert_uimm_bits!(NORM, 4);
41168 static_assert_uimm_bits!(SIGN, 2);
41169 static_assert_mantissas_sae!(SAE);
41170 let a: Simd = a.as_f64x2();
41171 let b: Simd = b.as_f64x2();
41172 let r: Simd = vgetmantsd(a, b, SIGN << 2 | NORM, src:f64x2::ZERO, m:0b1, SAE);
41173 transmute(src:r)
41174 }
41175}
41176
41177/// Normalize the mantissas of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
41178/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
41179/// _MM_MANT_NORM_1_2 // interval [1, 2)\
41180/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
41181/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
41182/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
41183/// The sign is determined by sc which can take the following values:\
41184/// _MM_MANT_SIGN_src // sign = sign(src)\
41185/// _MM_MANT_SIGN_zero // sign = 0\
41186/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1\
41187/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
41188///
41189/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_getmant_round_sd&expand=2890)
41190#[inline]
41191#[target_feature(enable = "avx512f")]
41192#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41193#[cfg_attr(test, assert_instr(vgetmantsd, NORM = 0, SIGN = 0, SAE = 4))]
41194#[rustc_legacy_const_generics(4, 5, 6)]
41195pub fn _mm_mask_getmant_round_sd<
41196 const NORM: _MM_MANTISSA_NORM_ENUM,
41197 const SIGN: _MM_MANTISSA_SIGN_ENUM,
41198 const SAE: i32,
41199>(
41200 src: __m128d,
41201 k: __mmask8,
41202 a: __m128d,
41203 b: __m128d,
41204) -> __m128d {
41205 unsafe {
41206 static_assert_uimm_bits!(NORM, 4);
41207 static_assert_uimm_bits!(SIGN, 2);
41208 static_assert_mantissas_sae!(SAE);
41209 let a: Simd = a.as_f64x2();
41210 let b: Simd = b.as_f64x2();
41211 let src: Simd = src.as_f64x2();
41212 let r: Simd = vgetmantsd(a, b, SIGN << 2 | NORM, src, m:k, SAE);
41213 transmute(src:r)
41214 }
41215}
41216
41217/// Normalize the mantissas of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
41218/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
41219/// _MM_MANT_NORM_1_2 // interval [1, 2)\
41220/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
41221/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
41222/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
41223/// The sign is determined by sc which can take the following values:\
41224/// _MM_MANT_SIGN_src // sign = sign(src)\
41225/// _MM_MANT_SIGN_zero // sign = 0\
41226/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1\
41227/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
41228///
41229/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_getmant_round_sd&expand=2891)
41230#[inline]
41231#[target_feature(enable = "avx512f")]
41232#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41233#[cfg_attr(test, assert_instr(vgetmantsd, NORM = 0, SIGN = 0, SAE = 4))]
41234#[rustc_legacy_const_generics(3, 4, 5)]
41235pub fn _mm_maskz_getmant_round_sd<
41236 const NORM: _MM_MANTISSA_NORM_ENUM,
41237 const SIGN: _MM_MANTISSA_SIGN_ENUM,
41238 const SAE: i32,
41239>(
41240 k: __mmask8,
41241 a: __m128d,
41242 b: __m128d,
41243) -> __m128d {
41244 unsafe {
41245 static_assert_uimm_bits!(NORM, 4);
41246 static_assert_uimm_bits!(SIGN, 2);
41247 static_assert_mantissas_sae!(SAE);
41248 let a: Simd = a.as_f64x2();
41249 let b: Simd = b.as_f64x2();
41250 let r: Simd = vgetmantsd(a, b, SIGN << 2 | NORM, src:f64x2::ZERO, m:k, SAE);
41251 transmute(src:r)
41252 }
41253}
41254
41255/// Round the lower single-precision (32-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
41256/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
41257/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
41258/// * [`_MM_FROUND_TO_NEG_INF`] : round down
41259/// * [`_MM_FROUND_TO_POS_INF`] : round up
41260/// * [`_MM_FROUND_TO_ZERO`] : truncate
41261/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41262///
41263/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
41264/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_roundscale_round_ss&expand=4796)
41265#[inline]
41266#[target_feature(enable = "avx512f")]
41267#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41268#[cfg_attr(test, assert_instr(vrndscaless, IMM8 = 0, SAE = 8))]
41269#[rustc_legacy_const_generics(2, 3)]
41270pub fn _mm_roundscale_round_ss<const IMM8: i32, const SAE: i32>(a: __m128, b: __m128) -> __m128 {
41271 unsafe {
41272 static_assert_uimm_bits!(IMM8, 8);
41273 static_assert_mantissas_sae!(SAE);
41274 let a: Simd = a.as_f32x4();
41275 let b: Simd = b.as_f32x4();
41276 let r: Simd = vrndscaless(a, b, src:f32x4::ZERO, mask:0b11111111, IMM8, SAE);
41277 transmute(src:r)
41278 }
41279}
41280
41281/// Round the lower single-precision (32-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
41282/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
41283/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
41284/// * [`_MM_FROUND_TO_NEG_INF`] : round down
41285/// * [`_MM_FROUND_TO_POS_INF`] : round up
41286/// * [`_MM_FROUND_TO_ZERO`] : truncate
41287/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41288///
41289/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
41290/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_roundscale_round_ss&expand=4794)
41291#[inline]
41292#[target_feature(enable = "avx512f")]
41293#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41294#[cfg_attr(test, assert_instr(vrndscaless, IMM8 = 0, SAE = 8))]
41295#[rustc_legacy_const_generics(4, 5)]
41296pub fn _mm_mask_roundscale_round_ss<const IMM8: i32, const SAE: i32>(
41297 src: __m128,
41298 k: __mmask8,
41299 a: __m128,
41300 b: __m128,
41301) -> __m128 {
41302 unsafe {
41303 static_assert_uimm_bits!(IMM8, 8);
41304 static_assert_mantissas_sae!(SAE);
41305 let a: Simd = a.as_f32x4();
41306 let b: Simd = b.as_f32x4();
41307 let src: Simd = src.as_f32x4();
41308 let r: Simd = vrndscaless(a, b, src, mask:k, IMM8, SAE);
41309 transmute(src:r)
41310 }
41311}
41312
41313/// Round the lower single-precision (32-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
41314/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
41315/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
41316/// * [`_MM_FROUND_TO_NEG_INF`] : round down
41317/// * [`_MM_FROUND_TO_POS_INF`] : round up
41318/// * [`_MM_FROUND_TO_ZERO`] : truncate
41319/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41320///
41321/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
41322/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_roundscale_round_ss&expand=4795)
41323#[inline]
41324#[target_feature(enable = "avx512f")]
41325#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41326#[cfg_attr(test, assert_instr(vrndscaless, IMM8 = 0, SAE = 8))]
41327#[rustc_legacy_const_generics(3, 4)]
41328pub fn _mm_maskz_roundscale_round_ss<const IMM8: i32, const SAE: i32>(
41329 k: __mmask8,
41330 a: __m128,
41331 b: __m128,
41332) -> __m128 {
41333 unsafe {
41334 static_assert_uimm_bits!(IMM8, 8);
41335 static_assert_mantissas_sae!(SAE);
41336 let a: Simd = a.as_f32x4();
41337 let b: Simd = b.as_f32x4();
41338 let r: Simd = vrndscaless(a, b, src:f32x4::ZERO, mask:k, IMM8, SAE);
41339 transmute(src:r)
41340 }
41341}
41342
41343/// Round the lower double-precision (64-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
41344/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
41345/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
41346/// * [`_MM_FROUND_TO_NEG_INF`] : round down
41347/// * [`_MM_FROUND_TO_POS_INF`] : round up
41348/// * [`_MM_FROUND_TO_ZERO`] : truncate
41349/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41350///
41351/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
41352/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_roundscale_round_sd&expand=4793)
41353#[inline]
41354#[target_feature(enable = "avx512f")]
41355#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41356#[cfg_attr(test, assert_instr(vrndscalesd, IMM8 = 0, SAE = 8))]
41357#[rustc_legacy_const_generics(2, 3)]
41358pub fn _mm_roundscale_round_sd<const IMM8: i32, const SAE: i32>(a: __m128d, b: __m128d) -> __m128d {
41359 unsafe {
41360 static_assert_uimm_bits!(IMM8, 8);
41361 static_assert_mantissas_sae!(SAE);
41362 let a: Simd = a.as_f64x2();
41363 let b: Simd = b.as_f64x2();
41364 let r: Simd = vrndscalesd(a, b, src:f64x2::ZERO, mask:0b11111111, IMM8, SAE);
41365 transmute(src:r)
41366 }
41367}
41368
41369/// Round the lower double-precision (64-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
41370/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
41371/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
41372/// * [`_MM_FROUND_TO_NEG_INF`] : round down
41373/// * [`_MM_FROUND_TO_POS_INF`] : round up
41374/// * [`_MM_FROUND_TO_ZERO`] : truncate
41375/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41376///
41377/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
41378/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_roundscale_round_sd&expand=4791)
41379#[inline]
41380#[target_feature(enable = "avx512f")]
41381#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41382#[cfg_attr(test, assert_instr(vrndscalesd, IMM8 = 0, SAE = 8))]
41383#[rustc_legacy_const_generics(4, 5)]
41384pub fn _mm_mask_roundscale_round_sd<const IMM8: i32, const SAE: i32>(
41385 src: __m128d,
41386 k: __mmask8,
41387 a: __m128d,
41388 b: __m128d,
41389) -> __m128d {
41390 unsafe {
41391 static_assert_uimm_bits!(IMM8, 8);
41392 static_assert_mantissas_sae!(SAE);
41393 let a: Simd = a.as_f64x2();
41394 let b: Simd = b.as_f64x2();
41395 let src: Simd = src.as_f64x2();
41396 let r: Simd = vrndscalesd(a, b, src, mask:k, IMM8, SAE);
41397 transmute(src:r)
41398 }
41399}
41400
41401/// Round the lower double-precision (64-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
41402/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
41403/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
41404/// * [`_MM_FROUND_TO_NEG_INF`] : round down
41405/// * [`_MM_FROUND_TO_POS_INF`] : round up
41406/// * [`_MM_FROUND_TO_ZERO`] : truncate
41407/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41408///
41409/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
41410/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_roundscale_round_sd&expand=4792)
41411#[inline]
41412#[target_feature(enable = "avx512f")]
41413#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41414#[cfg_attr(test, assert_instr(vrndscalesd, IMM8 = 0, SAE = 8))]
41415#[rustc_legacy_const_generics(3, 4)]
41416pub fn _mm_maskz_roundscale_round_sd<const IMM8: i32, const SAE: i32>(
41417 k: __mmask8,
41418 a: __m128d,
41419 b: __m128d,
41420) -> __m128d {
41421 unsafe {
41422 static_assert_uimm_bits!(IMM8, 8);
41423 static_assert_mantissas_sae!(SAE);
41424 let a: Simd = a.as_f64x2();
41425 let b: Simd = b.as_f64x2();
41426 let r: Simd = vrndscalesd(a, b, src:f64x2::ZERO, mask:k, IMM8, SAE);
41427 transmute(src:r)
41428 }
41429}
41430
41431/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
41432///
41433/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
41434/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
41435/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
41436/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
41437/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
41438/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41439///
41440/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_scalef_round_ss&expand=4895)
41441#[inline]
41442#[target_feature(enable = "avx512f")]
41443#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41444#[cfg_attr(test, assert_instr(vscalefss, ROUNDING = 8))]
41445#[rustc_legacy_const_generics(2)]
41446pub fn _mm_scalef_round_ss<const ROUNDING: i32>(a: __m128, b: __m128) -> __m128 {
41447 unsafe {
41448 static_assert_rounding!(ROUNDING);
41449 let a: Simd = a.as_f32x4();
41450 let b: Simd = b.as_f32x4();
41451 let r: Simd = vscalefss(a, b, src:f32x4::ZERO, mask:0b11111111, ROUNDING);
41452 transmute(src:r)
41453 }
41454}
41455
41456/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
41457///
41458/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
41459/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
41460/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
41461/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
41462/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
41463/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41464///
41465/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_scalef_round_ss&expand=4893)
41466#[inline]
41467#[target_feature(enable = "avx512f")]
41468#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41469#[cfg_attr(test, assert_instr(vscalefss, ROUNDING = 8))]
41470#[rustc_legacy_const_generics(4)]
41471pub fn _mm_mask_scalef_round_ss<const ROUNDING: i32>(
41472 src: __m128,
41473 k: __mmask8,
41474 a: __m128,
41475 b: __m128,
41476) -> __m128 {
41477 unsafe {
41478 static_assert_rounding!(ROUNDING);
41479 let a: Simd = a.as_f32x4();
41480 let b: Simd = b.as_f32x4();
41481 let src: Simd = src.as_f32x4();
41482 let r: Simd = vscalefss(a, b, src, mask:k, ROUNDING);
41483 transmute(src:r)
41484 }
41485}
41486
41487/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
41488///
41489/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
41490/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
41491/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
41492/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
41493/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
41494/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41495///
41496/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_scalef_round_ss&expand=4894)
41497#[inline]
41498#[target_feature(enable = "avx512f")]
41499#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41500#[cfg_attr(test, assert_instr(vscalefss, ROUNDING = 8))]
41501#[rustc_legacy_const_generics(3)]
41502pub fn _mm_maskz_scalef_round_ss<const ROUNDING: i32>(k: __mmask8, a: __m128, b: __m128) -> __m128 {
41503 unsafe {
41504 static_assert_rounding!(ROUNDING);
41505 let a: Simd = a.as_f32x4();
41506 let b: Simd = b.as_f32x4();
41507 let r: Simd = vscalefss(a, b, src:f32x4::ZERO, mask:k, ROUNDING);
41508 transmute(src:r)
41509 }
41510}
41511
41512/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
41513///
41514/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
41515/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
41516/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
41517/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
41518/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
41519/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41520///
41521/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_scalef_round_sd&expand=4892)
41522#[inline]
41523#[target_feature(enable = "avx512f")]
41524#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41525#[cfg_attr(test, assert_instr(vscalefsd, ROUNDING = 8))]
41526#[rustc_legacy_const_generics(2)]
41527pub fn _mm_scalef_round_sd<const ROUNDING: i32>(a: __m128d, b: __m128d) -> __m128d {
41528 unsafe {
41529 static_assert_rounding!(ROUNDING);
41530 let a: Simd = a.as_f64x2();
41531 let b: Simd = b.as_f64x2();
41532 let r: Simd = vscalefsd(a, b, src:f64x2::ZERO, mask:0b11111111, ROUNDING);
41533 transmute(src:r)
41534 }
41535}
41536
41537/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
41538///
41539/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
41540/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
41541/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
41542/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
41543/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
41544/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41545///
41546/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_scalef_round_sd&expand=4890)
41547#[inline]
41548#[target_feature(enable = "avx512f")]
41549#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41550#[cfg_attr(test, assert_instr(vscalefsd, ROUNDING = 8))]
41551#[rustc_legacy_const_generics(4)]
41552pub fn _mm_mask_scalef_round_sd<const ROUNDING: i32>(
41553 src: __m128d,
41554 k: __mmask8,
41555 a: __m128d,
41556 b: __m128d,
41557) -> __m128d {
41558 unsafe {
41559 let a: Simd = a.as_f64x2();
41560 let b: Simd = b.as_f64x2();
41561 let src: Simd = src.as_f64x2();
41562 let r: Simd = vscalefsd(a, b, src, mask:k, ROUNDING);
41563 transmute(src:r)
41564 }
41565}
41566
41567/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
41568///
41569/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
41570/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
41571/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
41572/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
41573/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
41574/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41575///
41576/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_scalef_round_sd&expand=4891)
41577#[inline]
41578#[target_feature(enable = "avx512f")]
41579#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41580#[cfg_attr(test, assert_instr(vscalefsd, ROUNDING = 8))]
41581#[rustc_legacy_const_generics(3)]
41582pub fn _mm_maskz_scalef_round_sd<const ROUNDING: i32>(
41583 k: __mmask8,
41584 a: __m128d,
41585 b: __m128d,
41586) -> __m128d {
41587 unsafe {
41588 static_assert_rounding!(ROUNDING);
41589 let a: Simd = a.as_f64x2();
41590 let b: Simd = b.as_f64x2();
41591 let r: Simd = vscalefsd(a, b, src:f64x2::ZERO, mask:k, ROUNDING);
41592 transmute(src:r)
41593 }
41594}
41595
41596/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
41597///
41598/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
41599/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
41600/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
41601/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
41602/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
41603/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41604///
41605/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fmadd_round_ss&expand=2573)
41606#[inline]
41607#[target_feature(enable = "avx512f")]
41608#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41609#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))]
41610#[rustc_legacy_const_generics(3)]
41611pub fn _mm_fmadd_round_ss<const ROUNDING: i32>(a: __m128, b: __m128, c: __m128) -> __m128 {
41612 unsafe {
41613 static_assert_rounding!(ROUNDING);
41614 let extracta: f32 = simd_extract!(a, 0);
41615 let extractb: f32 = simd_extract!(b, 0);
41616 let extractc: f32 = simd_extract!(c, 0);
41617 let r: f32 = vfmaddssround(a:extracta, b:extractb, c:extractc, ROUNDING);
41618 simd_insert!(a, 0, r)
41619 }
41620}
41621
41622/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
41623///
41624/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
41625/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
41626/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
41627/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
41628/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
41629/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41630///
41631/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fmadd_round_ss&expand=2574)
41632#[inline]
41633#[target_feature(enable = "avx512f")]
41634#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41635#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))]
41636#[rustc_legacy_const_generics(4)]
41637pub fn _mm_mask_fmadd_round_ss<const ROUNDING: i32>(
41638 a: __m128,
41639 k: __mmask8,
41640 b: __m128,
41641 c: __m128,
41642) -> __m128 {
41643 unsafe {
41644 static_assert_rounding!(ROUNDING);
41645 let mut fmadd: f32 = simd_extract!(a, 0);
41646 if (k & 0b00000001) != 0 {
41647 let extractb: f32 = simd_extract!(b, 0);
41648 let extractc: f32 = simd_extract!(c, 0);
41649 fmadd = vfmaddssround(a:fmadd, b:extractb, c:extractc, ROUNDING);
41650 }
41651 simd_insert!(a, 0, fmadd)
41652 }
41653}
41654
41655/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
41656///
41657/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
41658/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
41659/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
41660/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
41661/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
41662/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41663///
41664/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fmadd_round_ss&expand=2576)
41665#[inline]
41666#[target_feature(enable = "avx512f")]
41667#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41668#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))]
41669#[rustc_legacy_const_generics(4)]
41670pub fn _mm_maskz_fmadd_round_ss<const ROUNDING: i32>(
41671 k: __mmask8,
41672 a: __m128,
41673 b: __m128,
41674 c: __m128,
41675) -> __m128 {
41676 unsafe {
41677 static_assert_rounding!(ROUNDING);
41678 let mut fmadd: f32 = 0.;
41679 if (k & 0b00000001) != 0 {
41680 let extracta: f32 = simd_extract!(a, 0);
41681 let extractb: f32 = simd_extract!(b, 0);
41682 let extractc: f32 = simd_extract!(c, 0);
41683 fmadd = vfmaddssround(a:extracta, b:extractb, c:extractc, ROUNDING);
41684 }
41685 simd_insert!(a, 0, fmadd)
41686 }
41687}
41688
41689/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from c to the upper elements of dst.\
41690///
41691/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
41692/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
41693/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
41694/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
41695/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
41696/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41697///
41698/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fmadd_round_ss&expand=2575)
41699#[inline]
41700#[target_feature(enable = "avx512f")]
41701#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41702#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))]
41703#[rustc_legacy_const_generics(4)]
41704pub fn _mm_mask3_fmadd_round_ss<const ROUNDING: i32>(
41705 a: __m128,
41706 b: __m128,
41707 c: __m128,
41708 k: __mmask8,
41709) -> __m128 {
41710 unsafe {
41711 static_assert_rounding!(ROUNDING);
41712 let mut fmadd: f32 = simd_extract!(c, 0);
41713 if (k & 0b00000001) != 0 {
41714 let extracta: f32 = simd_extract!(a, 0);
41715 let extractb: f32 = simd_extract!(b, 0);
41716 fmadd = vfmaddssround(a:extracta, b:extractb, c:fmadd, ROUNDING);
41717 }
41718 simd_insert!(c, 0, fmadd)
41719 }
41720}
41721
41722/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
41723///
41724/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
41725/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
41726/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
41727/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
41728/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
41729/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41730///
41731/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fmadd_round_sd&expand=2569)
41732#[inline]
41733#[target_feature(enable = "avx512f")]
41734#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41735#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))]
41736#[rustc_legacy_const_generics(3)]
41737pub fn _mm_fmadd_round_sd<const ROUNDING: i32>(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
41738 unsafe {
41739 static_assert_rounding!(ROUNDING);
41740 let extracta: f64 = simd_extract!(a, 0);
41741 let extractb: f64 = simd_extract!(b, 0);
41742 let extractc: f64 = simd_extract!(c, 0);
41743 let fmadd: f64 = vfmaddsdround(a:extracta, b:extractb, c:extractc, ROUNDING);
41744 simd_insert!(a, 0, fmadd)
41745 }
41746}
41747
41748/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
41749///
41750/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
41751/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
41752/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
41753/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
41754/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
41755/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41756///
41757/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fmadd_round_sd&expand=2570)
41758#[inline]
41759#[target_feature(enable = "avx512f")]
41760#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41761#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))]
41762#[rustc_legacy_const_generics(4)]
41763pub fn _mm_mask_fmadd_round_sd<const ROUNDING: i32>(
41764 a: __m128d,
41765 k: __mmask8,
41766 b: __m128d,
41767 c: __m128d,
41768) -> __m128d {
41769 unsafe {
41770 static_assert_rounding!(ROUNDING);
41771 let mut fmadd: f64 = simd_extract!(a, 0);
41772 if (k & 0b00000001) != 0 {
41773 let extractb: f64 = simd_extract!(b, 0);
41774 let extractc: f64 = simd_extract!(c, 0);
41775 fmadd = vfmaddsdround(a:fmadd, b:extractb, c:extractc, ROUNDING);
41776 }
41777 simd_insert!(a, 0, fmadd)
41778 }
41779}
41780
41781/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
41782///
41783/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
41784/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
41785/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
41786/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
41787/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
41788/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41789///
41790/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fmadd_round_sd&expand=2572)
41791#[inline]
41792#[target_feature(enable = "avx512f")]
41793#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41794#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))]
41795#[rustc_legacy_const_generics(4)]
41796pub fn _mm_maskz_fmadd_round_sd<const ROUNDING: i32>(
41797 k: __mmask8,
41798 a: __m128d,
41799 b: __m128d,
41800 c: __m128d,
41801) -> __m128d {
41802 unsafe {
41803 static_assert_rounding!(ROUNDING);
41804 let mut fmadd: f64 = 0.;
41805 if (k & 0b00000001) != 0 {
41806 let extracta: f64 = simd_extract!(a, 0);
41807 let extractb: f64 = simd_extract!(b, 0);
41808 let extractc: f64 = simd_extract!(c, 0);
41809 fmadd = vfmaddsdround(a:extracta, b:extractb, c:extractc, ROUNDING);
41810 }
41811 simd_insert!(a, 0, fmadd)
41812 }
41813}
41814
41815/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from c to the upper element of dst.\
41816///
41817/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
41818/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
41819/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
41820/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
41821/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
41822/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41823///
41824/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fmadd_round_sd&expand=2571)
41825#[inline]
41826#[target_feature(enable = "avx512f")]
41827#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41828#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))]
41829#[rustc_legacy_const_generics(4)]
41830pub fn _mm_mask3_fmadd_round_sd<const ROUNDING: i32>(
41831 a: __m128d,
41832 b: __m128d,
41833 c: __m128d,
41834 k: __mmask8,
41835) -> __m128d {
41836 unsafe {
41837 static_assert_rounding!(ROUNDING);
41838 let mut fmadd: f64 = simd_extract!(c, 0);
41839 if (k & 0b00000001) != 0 {
41840 let extracta: f64 = simd_extract!(a, 0);
41841 let extractb: f64 = simd_extract!(b, 0);
41842 fmadd = vfmaddsdround(a:extracta, b:extractb, c:fmadd, ROUNDING);
41843 }
41844 simd_insert!(c, 0, fmadd)
41845 }
41846}
41847
41848/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
41849///
41850/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
41851/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
41852/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
41853/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
41854/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
41855/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41856///
41857/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fmsub_round_ss&expand=2659)
41858#[inline]
41859#[target_feature(enable = "avx512f")]
41860#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41861#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))]
41862#[rustc_legacy_const_generics(3)]
41863pub fn _mm_fmsub_round_ss<const ROUNDING: i32>(a: __m128, b: __m128, c: __m128) -> __m128 {
41864 unsafe {
41865 static_assert_rounding!(ROUNDING);
41866 let extracta: f32 = simd_extract!(a, 0);
41867 let extractb: f32 = simd_extract!(b, 0);
41868 let extractc: f32 = simd_extract!(c, 0);
41869 let extractc: f32 = -extractc;
41870 let fmsub: f32 = vfmaddssround(a:extracta, b:extractb, c:extractc, ROUNDING);
41871 simd_insert!(a, 0, fmsub)
41872 }
41873}
41874
41875/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
41876///
41877/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
41878/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
41879/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
41880/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
41881/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
41882/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41883///
41884/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fmsub_round_ss&expand=2660)
41885#[inline]
41886#[target_feature(enable = "avx512f")]
41887#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41888#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))]
41889#[rustc_legacy_const_generics(4)]
41890pub fn _mm_mask_fmsub_round_ss<const ROUNDING: i32>(
41891 a: __m128,
41892 k: __mmask8,
41893 b: __m128,
41894 c: __m128,
41895) -> __m128 {
41896 unsafe {
41897 static_assert_rounding!(ROUNDING);
41898 let mut fmsub: f32 = simd_extract!(a, 0);
41899 if (k & 0b00000001) != 0 {
41900 let extractb: f32 = simd_extract!(b, 0);
41901 let extractc: f32 = simd_extract!(c, 0);
41902 let extractc: f32 = -extractc;
41903 fmsub = vfmaddssround(a:fmsub, b:extractb, c:extractc, ROUNDING);
41904 }
41905 simd_insert!(a, 0, fmsub)
41906 }
41907}
41908
41909/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
41910///
41911/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
41912/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
41913/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
41914/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
41915/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
41916/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41917///
41918/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fmsub_round_ss&expand=2662)
41919#[inline]
41920#[target_feature(enable = "avx512f")]
41921#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41922#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))]
41923#[rustc_legacy_const_generics(4)]
41924pub fn _mm_maskz_fmsub_round_ss<const ROUNDING: i32>(
41925 k: __mmask8,
41926 a: __m128,
41927 b: __m128,
41928 c: __m128,
41929) -> __m128 {
41930 unsafe {
41931 static_assert_rounding!(ROUNDING);
41932 let mut fmsub: f32 = 0.;
41933 if (k & 0b00000001) != 0 {
41934 let extracta: f32 = simd_extract!(a, 0);
41935 let extractb: f32 = simd_extract!(b, 0);
41936 let extractc: f32 = simd_extract!(c, 0);
41937 let extractc: f32 = -extractc;
41938 fmsub = vfmaddssround(a:extracta, b:extractb, c:extractc, ROUNDING);
41939 }
41940 simd_insert!(a, 0, fmsub)
41941 }
41942}
41943
41944/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from c to the upper elements of dst.\
41945///
41946/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
41947/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
41948/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
41949/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
41950/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
41951/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41952///
41953/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fmsub_round_ss&expand=2661)
41954#[inline]
41955#[target_feature(enable = "avx512f")]
41956#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41957#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))]
41958#[rustc_legacy_const_generics(4)]
41959pub fn _mm_mask3_fmsub_round_ss<const ROUNDING: i32>(
41960 a: __m128,
41961 b: __m128,
41962 c: __m128,
41963 k: __mmask8,
41964) -> __m128 {
41965 unsafe {
41966 static_assert_rounding!(ROUNDING);
41967 let mut fmsub: f32 = simd_extract!(c, 0);
41968 if (k & 0b00000001) != 0 {
41969 let extracta: f32 = simd_extract!(a, 0);
41970 let extractb: f32 = simd_extract!(b, 0);
41971 let extractc: f32 = -fmsub;
41972 fmsub = vfmaddssround(a:extracta, b:extractb, c:extractc, ROUNDING);
41973 }
41974 simd_insert!(c, 0, fmsub)
41975 }
41976}
41977
41978/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
41979///
41980/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
41981/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
41982/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
41983/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
41984/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
41985/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41986///
41987/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fmsub_round_sd&expand=2655)
41988#[inline]
41989#[target_feature(enable = "avx512f")]
41990#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41991#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))]
41992#[rustc_legacy_const_generics(3)]
41993pub fn _mm_fmsub_round_sd<const ROUNDING: i32>(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
41994 unsafe {
41995 static_assert_rounding!(ROUNDING);
41996 let extracta: f64 = simd_extract!(a, 0);
41997 let extractb: f64 = simd_extract!(b, 0);
41998 let extractc: f64 = simd_extract!(c, 0);
41999 let extractc: f64 = -extractc;
42000 let fmsub: f64 = vfmaddsdround(a:extracta, b:extractb, c:extractc, ROUNDING);
42001 simd_insert!(a, 0, fmsub)
42002 }
42003}
42004
42005/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
42006///
42007/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
42008/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
42009/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
42010/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
42011/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
42012/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
42013///
42014/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fmsub_round_sd&expand=2656)
42015#[inline]
42016#[target_feature(enable = "avx512f")]
42017#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42018#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))]
42019#[rustc_legacy_const_generics(4)]
42020pub fn _mm_mask_fmsub_round_sd<const ROUNDING: i32>(
42021 a: __m128d,
42022 k: __mmask8,
42023 b: __m128d,
42024 c: __m128d,
42025) -> __m128d {
42026 unsafe {
42027 static_assert_rounding!(ROUNDING);
42028 let mut fmsub: f64 = simd_extract!(a, 0);
42029 if (k & 0b00000001) != 0 {
42030 let extractb: f64 = simd_extract!(b, 0);
42031 let extractc: f64 = simd_extract!(c, 0);
42032 let extractc: f64 = -extractc;
42033 fmsub = vfmaddsdround(a:fmsub, b:extractb, c:extractc, ROUNDING);
42034 }
42035 simd_insert!(a, 0, fmsub)
42036 }
42037}
42038
42039/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
42040///
42041/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
42042/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
42043/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
42044/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
42045/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
42046/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
42047///
42048/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fmsub_round_sd&expand=2658)
42049#[inline]
42050#[target_feature(enable = "avx512f")]
42051#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42052#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))]
42053#[rustc_legacy_const_generics(4)]
42054pub fn _mm_maskz_fmsub_round_sd<const ROUNDING: i32>(
42055 k: __mmask8,
42056 a: __m128d,
42057 b: __m128d,
42058 c: __m128d,
42059) -> __m128d {
42060 unsafe {
42061 static_assert_rounding!(ROUNDING);
42062 let mut fmsub: f64 = 0.;
42063 if (k & 0b00000001) != 0 {
42064 let extracta: f64 = simd_extract!(a, 0);
42065 let extractb: f64 = simd_extract!(b, 0);
42066 let extractc: f64 = simd_extract!(c, 0);
42067 let extractc: f64 = -extractc;
42068 fmsub = vfmaddsdround(a:extracta, b:extractb, c:extractc, ROUNDING);
42069 }
42070 simd_insert!(a, 0, fmsub)
42071 }
42072}
42073
42074/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from c to the upper element of dst.\
42075///
42076/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
42077/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
42078/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
42079/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
42080/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
42081/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
42082///
42083/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fmsub_round_sd&expand=2657)
42084#[inline]
42085#[target_feature(enable = "avx512f")]
42086#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42087#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))]
42088#[rustc_legacy_const_generics(4)]
42089pub fn _mm_mask3_fmsub_round_sd<const ROUNDING: i32>(
42090 a: __m128d,
42091 b: __m128d,
42092 c: __m128d,
42093 k: __mmask8,
42094) -> __m128d {
42095 unsafe {
42096 static_assert_rounding!(ROUNDING);
42097 let mut fmsub: f64 = simd_extract!(c, 0);
42098 if (k & 0b00000001) != 0 {
42099 let extracta: f64 = simd_extract!(a, 0);
42100 let extractb: f64 = simd_extract!(b, 0);
42101 let extractc: f64 = -fmsub;
42102 fmsub = vfmaddsdround(a:extracta, b:extractb, c:extractc, ROUNDING);
42103 }
42104 simd_insert!(c, 0, fmsub)
42105 }
42106}
42107
42108/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
42109///
42110/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
42111/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
42112/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
42113/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
42114/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
42115/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
42116///
42117/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fnmadd_round_ss&expand=2739)
42118#[inline]
42119#[target_feature(enable = "avx512f")]
42120#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42121#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))]
42122#[rustc_legacy_const_generics(3)]
42123pub fn _mm_fnmadd_round_ss<const ROUNDING: i32>(a: __m128, b: __m128, c: __m128) -> __m128 {
42124 unsafe {
42125 static_assert_rounding!(ROUNDING);
42126 let extracta: f32 = simd_extract!(a, 0);
42127 let extracta: f32 = -extracta;
42128 let extractb: f32 = simd_extract!(b, 0);
42129 let extractc: f32 = simd_extract!(c, 0);
42130 let fnmadd: f32 = vfmaddssround(a:extracta, b:extractb, c:extractc, ROUNDING);
42131 simd_insert!(a, 0, fnmadd)
42132 }
42133}
42134
42135/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
42136///
42137/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
42138/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
42139/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
42140/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
42141/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
42142/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
42143///
42144/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fnmadd_round_ss&expand=2740)
42145#[inline]
42146#[target_feature(enable = "avx512f")]
42147#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42148#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))]
42149#[rustc_legacy_const_generics(4)]
42150pub fn _mm_mask_fnmadd_round_ss<const ROUNDING: i32>(
42151 a: __m128,
42152 k: __mmask8,
42153 b: __m128,
42154 c: __m128,
42155) -> __m128 {
42156 unsafe {
42157 static_assert_rounding!(ROUNDING);
42158 let mut fnmadd: f32 = simd_extract!(a, 0);
42159 if (k & 0b00000001) != 0 {
42160 let extracta: f32 = -fnmadd;
42161 let extractb: f32 = simd_extract!(b, 0);
42162 let extractc: f32 = simd_extract!(c, 0);
42163 fnmadd = vfmaddssround(a:extracta, b:extractb, c:extractc, ROUNDING);
42164 }
42165 simd_insert!(a, 0, fnmadd)
42166 }
42167}
42168
42169/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
42170///
42171/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
42172/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
42173/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
42174/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
42175/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
42176/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
42177///
42178/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fnmadd_round_ss&expand=2742)
42179#[inline]
42180#[target_feature(enable = "avx512f")]
42181#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42182#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))]
42183#[rustc_legacy_const_generics(4)]
42184pub fn _mm_maskz_fnmadd_round_ss<const ROUNDING: i32>(
42185 k: __mmask8,
42186 a: __m128,
42187 b: __m128,
42188 c: __m128,
42189) -> __m128 {
42190 unsafe {
42191 static_assert_rounding!(ROUNDING);
42192 let mut fnmadd: f32 = 0.;
42193 if (k & 0b00000001) != 0 {
42194 let extracta: f32 = simd_extract!(a, 0);
42195 let extracta: f32 = -extracta;
42196 let extractb: f32 = simd_extract!(b, 0);
42197 let extractc: f32 = simd_extract!(c, 0);
42198 fnmadd = vfmaddssround(a:extracta, b:extractb, c:extractc, ROUNDING);
42199 }
42200 simd_insert!(a, 0, fnmadd)
42201 }
42202}
42203
42204/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from c to the upper elements of dst.\
42205///
42206/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
42207/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
42208/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
42209/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
42210/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
42211/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
42212///
42213/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fnmadd_round_ss&expand=2741)
42214#[inline]
42215#[target_feature(enable = "avx512f")]
42216#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42217#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))]
42218#[rustc_legacy_const_generics(4)]
42219pub fn _mm_mask3_fnmadd_round_ss<const ROUNDING: i32>(
42220 a: __m128,
42221 b: __m128,
42222 c: __m128,
42223 k: __mmask8,
42224) -> __m128 {
42225 unsafe {
42226 static_assert_rounding!(ROUNDING);
42227 let mut fnmadd: f32 = simd_extract!(c, 0);
42228 if (k & 0b00000001) != 0 {
42229 let extracta: f32 = simd_extract!(a, 0);
42230 let extracta: f32 = -extracta;
42231 let extractb: f32 = simd_extract!(b, 0);
42232 fnmadd = vfmaddssround(a:extracta, b:extractb, c:fnmadd, ROUNDING);
42233 }
42234 simd_insert!(c, 0, fnmadd)
42235 }
42236}
42237
42238/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
42239///
42240/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
42241/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
42242/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
42243/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
42244/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
42245/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
42246///
42247/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fnmadd_round_sd&expand=2735)
42248#[inline]
42249#[target_feature(enable = "avx512f")]
42250#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42251#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))]
42252#[rustc_legacy_const_generics(3)]
42253pub fn _mm_fnmadd_round_sd<const ROUNDING: i32>(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
42254 unsafe {
42255 static_assert_rounding!(ROUNDING);
42256 let extracta: f64 = simd_extract!(a, 0);
42257 let extracta: f64 = -extracta;
42258 let extractb: f64 = simd_extract!(b, 0);
42259 let extractc: f64 = simd_extract!(c, 0);
42260 let fnmadd: f64 = vfmaddsdround(a:extracta, b:extractb, c:extractc, ROUNDING);
42261 simd_insert!(a, 0, fnmadd)
42262 }
42263}
42264
42265/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
42266///
42267/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
42268/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
42269/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
42270/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
42271/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
42272/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
42273///
42274/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fnmadd_round_sd&expand=2736)
42275#[inline]
42276#[target_feature(enable = "avx512f")]
42277#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42278#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))]
42279#[rustc_legacy_const_generics(4)]
42280pub fn _mm_mask_fnmadd_round_sd<const ROUNDING: i32>(
42281 a: __m128d,
42282 k: __mmask8,
42283 b: __m128d,
42284 c: __m128d,
42285) -> __m128d {
42286 unsafe {
42287 static_assert_rounding!(ROUNDING);
42288 let mut fnmadd: f64 = simd_extract!(a, 0);
42289 if (k & 0b00000001) != 0 {
42290 let extracta: f64 = -fnmadd;
42291 let extractb: f64 = simd_extract!(b, 0);
42292 let extractc: f64 = simd_extract!(c, 0);
42293 fnmadd = vfmaddsdround(a:extracta, b:extractb, c:extractc, ROUNDING);
42294 }
42295 simd_insert!(a, 0, fnmadd)
42296 }
42297}
42298
42299/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
42300///
42301/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
42302/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
42303/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
42304/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
42305/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
42306/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
42307///
42308/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fnmadd_round_sd&expand=2738)
42309#[inline]
42310#[target_feature(enable = "avx512f")]
42311#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42312#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))]
42313#[rustc_legacy_const_generics(4)]
42314pub fn _mm_maskz_fnmadd_round_sd<const ROUNDING: i32>(
42315 k: __mmask8,
42316 a: __m128d,
42317 b: __m128d,
42318 c: __m128d,
42319) -> __m128d {
42320 unsafe {
42321 static_assert_rounding!(ROUNDING);
42322 let mut fnmadd: f64 = 0.;
42323 if (k & 0b00000001) != 0 {
42324 let extracta: f64 = simd_extract!(a, 0);
42325 let extracta: f64 = -extracta;
42326 let extractb: f64 = simd_extract!(b, 0);
42327 let extractc: f64 = simd_extract!(c, 0);
42328 fnmadd = vfmaddsdround(a:extracta, b:extractb, c:extractc, ROUNDING);
42329 }
42330 simd_insert!(a, 0, fnmadd)
42331 }
42332}
42333
42334/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from c to the upper element of dst.\
42335///
42336/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
42337/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
42338/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
42339/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
42340/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
42341/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
42342///
42343/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fnmadd_round_sd&expand=2737)
42344#[inline]
42345#[target_feature(enable = "avx512f")]
42346#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42347#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))]
42348#[rustc_legacy_const_generics(4)]
42349pub fn _mm_mask3_fnmadd_round_sd<const ROUNDING: i32>(
42350 a: __m128d,
42351 b: __m128d,
42352 c: __m128d,
42353 k: __mmask8,
42354) -> __m128d {
42355 unsafe {
42356 static_assert_rounding!(ROUNDING);
42357 let mut fnmadd: f64 = simd_extract!(c, 0);
42358 if (k & 0b00000001) != 0 {
42359 let extracta: f64 = simd_extract!(a, 0);
42360 let extracta: f64 = -extracta;
42361 let extractb: f64 = simd_extract!(b, 0);
42362 fnmadd = vfmaddsdround(a:extracta, b:extractb, c:fnmadd, ROUNDING);
42363 }
42364 simd_insert!(c, 0, fnmadd)
42365 }
42366}
42367
42368/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, subtract the lower element in c from the negated intermediate result, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
42369///
42370/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
42371/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
42372/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
42373/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
42374/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
42375/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
42376///
42377/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fnmsub_round_ss&expand=2787)
42378#[inline]
42379#[target_feature(enable = "avx512f")]
42380#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42381#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))]
42382#[rustc_legacy_const_generics(3)]
42383pub fn _mm_fnmsub_round_ss<const ROUNDING: i32>(a: __m128, b: __m128, c: __m128) -> __m128 {
42384 unsafe {
42385 static_assert_rounding!(ROUNDING);
42386 let extracta: f32 = simd_extract!(a, 0);
42387 let extracta: f32 = -extracta;
42388 let extractb: f32 = simd_extract!(b, 0);
42389 let extractc: f32 = simd_extract!(c, 0);
42390 let extractc: f32 = -extractc;
42391 let fnmsub: f32 = vfmaddssround(a:extracta, b:extractb, c:extractc, ROUNDING);
42392 simd_insert!(a, 0, fnmsub)
42393 }
42394}
42395
42396/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
42397///
42398/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
42399/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
42400/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
42401/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
42402/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
42403/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
42404///
42405/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fnmsub_round_ss&expand=2788)
42406#[inline]
42407#[target_feature(enable = "avx512f")]
42408#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42409#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))]
42410#[rustc_legacy_const_generics(4)]
42411pub fn _mm_mask_fnmsub_round_ss<const ROUNDING: i32>(
42412 a: __m128,
42413 k: __mmask8,
42414 b: __m128,
42415 c: __m128,
42416) -> __m128 {
42417 unsafe {
42418 static_assert_rounding!(ROUNDING);
42419 let mut fnmsub: f32 = simd_extract!(a, 0);
42420 if (k & 0b00000001) != 0 {
42421 let extracta: f32 = -fnmsub;
42422 let extractb: f32 = simd_extract!(b, 0);
42423 let extractc: f32 = simd_extract!(c, 0);
42424 let extractc: f32 = -extractc;
42425 fnmsub = vfmaddssround(a:extracta, b:extractb, c:extractc, ROUNDING);
42426 }
42427 simd_insert!(a, 0, fnmsub)
42428 }
42429}
42430
42431/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
42432///
42433/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
42434/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
42435/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
42436/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
42437/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
42438/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
42439///
42440/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fnmsub_round_ss&expand=2790)
42441#[inline]
42442#[target_feature(enable = "avx512f")]
42443#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42444#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))]
42445#[rustc_legacy_const_generics(4)]
42446pub fn _mm_maskz_fnmsub_round_ss<const ROUNDING: i32>(
42447 k: __mmask8,
42448 a: __m128,
42449 b: __m128,
42450 c: __m128,
42451) -> __m128 {
42452 unsafe {
42453 static_assert_rounding!(ROUNDING);
42454 let mut fnmsub: f32 = 0.;
42455 if (k & 0b00000001) != 0 {
42456 let extracta: f32 = simd_extract!(a, 0);
42457 let extracta: f32 = -extracta;
42458 let extractb: f32 = simd_extract!(b, 0);
42459 let extractc: f32 = simd_extract!(c, 0);
42460 let extractc: f32 = -extractc;
42461 fnmsub = vfmaddssround(a:extracta, b:extractb, c:extractc, ROUNDING);
42462 }
42463 simd_insert!(a, 0, fnmsub)
42464 }
42465}
42466
42467/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from c to the upper elements of dst.\
42468///
42469/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
42470/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
42471/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
42472/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
42473/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
42474/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
42475///
42476/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fnmsub_round_ss&expand=2789)
42477#[inline]
42478#[target_feature(enable = "avx512f")]
42479#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42480#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))]
42481#[rustc_legacy_const_generics(4)]
42482pub fn _mm_mask3_fnmsub_round_ss<const ROUNDING: i32>(
42483 a: __m128,
42484 b: __m128,
42485 c: __m128,
42486 k: __mmask8,
42487) -> __m128 {
42488 unsafe {
42489 static_assert_rounding!(ROUNDING);
42490 let mut fnmsub: f32 = simd_extract!(c, 0);
42491 if (k & 0b00000001) != 0 {
42492 let extracta: f32 = simd_extract!(a, 0);
42493 let extracta: f32 = -extracta;
42494 let extractb: f32 = simd_extract!(b, 0);
42495 let extractc: f32 = -fnmsub;
42496 fnmsub = vfmaddssround(a:extracta, b:extractb, c:extractc, ROUNDING);
42497 }
42498 simd_insert!(c, 0, fnmsub)
42499 }
42500}
42501
42502/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
42503///
42504/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
42505/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
42506/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
42507/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
42508/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
42509/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
42510///
42511/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fnmsub_round_sd&expand=2783)
42512#[inline]
42513#[target_feature(enable = "avx512f")]
42514#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42515#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))]
42516#[rustc_legacy_const_generics(3)]
42517pub fn _mm_fnmsub_round_sd<const ROUNDING: i32>(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
42518 unsafe {
42519 static_assert_rounding!(ROUNDING);
42520 let extracta: f64 = simd_extract!(a, 0);
42521 let extracta: f64 = -extracta;
42522 let extractb: f64 = simd_extract!(b, 0);
42523 let extractc: f64 = simd_extract!(c, 0);
42524 let extractc: f64 = -extractc;
42525 let fnmsub: f64 = vfmaddsdround(a:extracta, b:extractb, c:extractc, ROUNDING);
42526 simd_insert!(a, 0, fnmsub)
42527 }
42528}
42529
42530/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
42531///
42532/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
42533/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
42534/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
42535/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
42536/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
42537/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
42538///
42539/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fnmsub_round_sd&expand=2784)
42540#[inline]
42541#[target_feature(enable = "avx512f")]
42542#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42543#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))]
42544#[rustc_legacy_const_generics(4)]
42545pub fn _mm_mask_fnmsub_round_sd<const ROUNDING: i32>(
42546 a: __m128d,
42547 k: __mmask8,
42548 b: __m128d,
42549 c: __m128d,
42550) -> __m128d {
42551 unsafe {
42552 static_assert_rounding!(ROUNDING);
42553 let mut fnmsub: f64 = simd_extract!(a, 0);
42554 if (k & 0b00000001) != 0 {
42555 let extracta: f64 = -fnmsub;
42556 let extractb: f64 = simd_extract!(b, 0);
42557 let extractc: f64 = simd_extract!(c, 0);
42558 let extractc: f64 = -extractc;
42559 fnmsub = vfmaddsdround(a:extracta, b:extractb, c:extractc, ROUNDING);
42560 }
42561 simd_insert!(a, 0, fnmsub)
42562 }
42563}
42564
42565/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
42566///
42567/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
42568/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
42569/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
42570/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
42571/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
42572/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
42573///
42574/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fnmsub_round_sd&expand=2786)
42575#[inline]
42576#[target_feature(enable = "avx512f")]
42577#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42578#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))]
42579#[rustc_legacy_const_generics(4)]
42580pub fn _mm_maskz_fnmsub_round_sd<const ROUNDING: i32>(
42581 k: __mmask8,
42582 a: __m128d,
42583 b: __m128d,
42584 c: __m128d,
42585) -> __m128d {
42586 unsafe {
42587 static_assert_rounding!(ROUNDING);
42588 let mut fnmsub: f64 = 0.;
42589 if (k & 0b00000001) != 0 {
42590 let extracta: f64 = simd_extract!(a, 0);
42591 let extracta: f64 = -extracta;
42592 let extractb: f64 = simd_extract!(b, 0);
42593 let extractc: f64 = simd_extract!(c, 0);
42594 let extractc: f64 = -extractc;
42595 fnmsub = vfmaddsdround(a:extracta, b:extractb, c:extractc, ROUNDING);
42596 }
42597 simd_insert!(a, 0, fnmsub)
42598 }
42599}
42600
42601/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from c to the upper element of dst.\
42602///
42603/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
42604/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
42605/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
42606/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
42607/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
42608/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
42609///
42610/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fnmsub_round_sd&expand=2785)
42611#[inline]
42612#[target_feature(enable = "avx512f")]
42613#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42614#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))]
42615#[rustc_legacy_const_generics(4)]
42616pub fn _mm_mask3_fnmsub_round_sd<const ROUNDING: i32>(
42617 a: __m128d,
42618 b: __m128d,
42619 c: __m128d,
42620 k: __mmask8,
42621) -> __m128d {
42622 unsafe {
42623 static_assert_rounding!(ROUNDING);
42624 let mut fnmsub: f64 = simd_extract!(c, 0);
42625 if (k & 0b00000001) != 0 {
42626 let extracta: f64 = simd_extract!(a, 0);
42627 let extracta: f64 = -extracta;
42628 let extractb: f64 = simd_extract!(b, 0);
42629 let extractc: f64 = -fnmsub;
42630 fnmsub = vfmaddsdround(a:extracta, b:extractb, c:extractc, ROUNDING);
42631 }
42632 simd_insert!(c, 0, fnmsub)
42633 }
42634}
42635
42636/// Fix up the lower single-precision (32-bit) floating-point elements in a and b using the lower 32-bit integer in c, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. imm8 is used to set the required flags reporting.
42637///
42638/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fixupimm_ss&expand=2517)
42639#[inline]
42640#[target_feature(enable = "avx512f")]
42641#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42642#[cfg_attr(test, assert_instr(vfixupimmss, IMM8 = 0))]
42643#[rustc_legacy_const_generics(3)]
42644pub fn _mm_fixupimm_ss<const IMM8: i32>(a: __m128, b: __m128, c: __m128i) -> __m128 {
42645 unsafe {
42646 static_assert_uimm_bits!(IMM8, 8);
42647 let a: Simd = a.as_f32x4();
42648 let b: Simd = b.as_f32x4();
42649 let c: Simd = c.as_i32x4();
42650 let r: Simd = vfixupimmss(a, b, c, IMM8, mask:0b11111111, _MM_FROUND_CUR_DIRECTION);
42651 let fixupimm: f32 = simd_extract!(r, 0);
42652 let r: Simd = simd_insert!(a, 0, fixupimm);
42653 transmute(src:r)
42654 }
42655}
42656
42657/// Fix up the lower single-precision (32-bit) floating-point elements in a and b using the lower 32-bit integer in c, store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. imm8 is used to set the required flags reporting.
42658///
42659/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fixupimm_ss&expand=2518)
42660#[inline]
42661#[target_feature(enable = "avx512f")]
42662#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42663#[cfg_attr(test, assert_instr(vfixupimmss, IMM8 = 0))]
42664#[rustc_legacy_const_generics(4)]
42665pub fn _mm_mask_fixupimm_ss<const IMM8: i32>(
42666 a: __m128,
42667 k: __mmask8,
42668 b: __m128,
42669 c: __m128i,
42670) -> __m128 {
42671 unsafe {
42672 static_assert_uimm_bits!(IMM8, 8);
42673 let a: Simd = a.as_f32x4();
42674 let b: Simd = b.as_f32x4();
42675 let c: Simd = c.as_i32x4();
42676 let fixupimm: Simd = vfixupimmss(a, b, c, IMM8, mask:k, _MM_FROUND_CUR_DIRECTION);
42677 let fixupimm: f32 = simd_extract!(fixupimm, 0);
42678 let r: Simd = simd_insert!(a, 0, fixupimm);
42679 transmute(src:r)
42680 }
42681}
42682
42683/// Fix up the lower single-precision (32-bit) floating-point elements in a and b using the lower 32-bit integer in c, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. imm8 is used to set the required flags reporting.
42684///
42685/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fixupimm_ss&expand=2519)
42686#[inline]
42687#[target_feature(enable = "avx512f")]
42688#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42689#[cfg_attr(test, assert_instr(vfixupimmss, IMM8 = 0))]
42690#[rustc_legacy_const_generics(4)]
42691pub fn _mm_maskz_fixupimm_ss<const IMM8: i32>(
42692 k: __mmask8,
42693 a: __m128,
42694 b: __m128,
42695 c: __m128i,
42696) -> __m128 {
42697 unsafe {
42698 static_assert_uimm_bits!(IMM8, 8);
42699 let a: Simd = a.as_f32x4();
42700 let b: Simd = b.as_f32x4();
42701 let c: Simd = c.as_i32x4();
42702 let fixupimm: Simd = vfixupimmssz(a, b, c, IMM8, mask:k, _MM_FROUND_CUR_DIRECTION);
42703 let fixupimm: f32 = simd_extract!(fixupimm, 0);
42704 let r: Simd = simd_insert!(a, 0, fixupimm);
42705 transmute(src:r)
42706 }
42707}
42708
42709/// Fix up the lower double-precision (64-bit) floating-point elements in a and b using the lower 64-bit integer in c, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst. imm8 is used to set the required flags reporting.
42710///
42711/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fixupimm_sd&expand=2514)
42712#[inline]
42713#[target_feature(enable = "avx512f")]
42714#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42715#[cfg_attr(test, assert_instr(vfixupimmsd, IMM8 = 0))]
42716#[rustc_legacy_const_generics(3)]
42717pub fn _mm_fixupimm_sd<const IMM8: i32>(a: __m128d, b: __m128d, c: __m128i) -> __m128d {
42718 unsafe {
42719 static_assert_uimm_bits!(IMM8, 8);
42720 let a: Simd = a.as_f64x2();
42721 let b: Simd = b.as_f64x2();
42722 let c: Simd = c.as_i64x2();
42723 let fixupimm: Simd = vfixupimmsd(a, b, c, IMM8, mask:0b11111111, _MM_FROUND_CUR_DIRECTION);
42724 let fixupimm: f64 = simd_extract!(fixupimm, 0);
42725 let r: Simd = simd_insert!(a, 0, fixupimm);
42726 transmute(src:r)
42727 }
42728}
42729
42730/// Fix up the lower double-precision (64-bit) floating-point elements in a and b using the lower 64-bit integer in c, store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. imm8 is used to set the required flags reporting.
42731///
42732/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fixupimm_sd&expand=2515)
42733#[inline]
42734#[target_feature(enable = "avx512f")]
42735#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42736#[cfg_attr(test, assert_instr(vfixupimmsd, IMM8 = 0))]
42737#[rustc_legacy_const_generics(4)]
42738pub fn _mm_mask_fixupimm_sd<const IMM8: i32>(
42739 a: __m128d,
42740 k: __mmask8,
42741 b: __m128d,
42742 c: __m128i,
42743) -> __m128d {
42744 unsafe {
42745 static_assert_uimm_bits!(IMM8, 8);
42746 let a: Simd = a.as_f64x2();
42747 let b: Simd = b.as_f64x2();
42748 let c: Simd = c.as_i64x2();
42749 let fixupimm: Simd = vfixupimmsd(a, b, c, IMM8, mask:k, _MM_FROUND_CUR_DIRECTION);
42750 let fixupimm: f64 = simd_extract!(fixupimm, 0);
42751 let r: Simd = simd_insert!(a, 0, fixupimm);
42752 transmute(src:r)
42753 }
42754}
42755
42756/// Fix up the lower double-precision (64-bit) floating-point elements in a and b using the lower 64-bit integer in c, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. imm8 is used to set the required flags reporting.
42757///
42758/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fixupimm_sd&expand=2516)
42759#[inline]
42760#[target_feature(enable = "avx512f")]
42761#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42762#[cfg_attr(test, assert_instr(vfixupimmsd, IMM8 = 0))]
42763#[rustc_legacy_const_generics(4)]
42764pub fn _mm_maskz_fixupimm_sd<const IMM8: i32>(
42765 k: __mmask8,
42766 a: __m128d,
42767 b: __m128d,
42768 c: __m128i,
42769) -> __m128d {
42770 unsafe {
42771 static_assert_uimm_bits!(IMM8, 8);
42772 let a: Simd = a.as_f64x2();
42773 let b: Simd = b.as_f64x2();
42774 let c: Simd = c.as_i64x2();
42775 let fixupimm: Simd = vfixupimmsdz(a, b, c, IMM8, mask:k, _MM_FROUND_CUR_DIRECTION);
42776 let fixupimm: f64 = simd_extract!(fixupimm, 0);
42777 let r: Simd = simd_insert!(a, 0, fixupimm);
42778 transmute(src:r)
42779 }
42780}
42781
42782/// Fix up the lower single-precision (32-bit) floating-point elements in a and b using the lower 32-bit integer in c, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. imm8 is used to set the required flags reporting.\
42783/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
42784///
42785/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fixupimm_round_ss&expand=2511)
42786#[inline]
42787#[target_feature(enable = "avx512f")]
42788#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42789#[cfg_attr(test, assert_instr(vfixupimmss, IMM8 = 0, SAE = 8))]
42790#[rustc_legacy_const_generics(3, 4)]
42791pub fn _mm_fixupimm_round_ss<const IMM8: i32, const SAE: i32>(
42792 a: __m128,
42793 b: __m128,
42794 c: __m128i,
42795) -> __m128 {
42796 unsafe {
42797 static_assert_uimm_bits!(IMM8, 8);
42798 static_assert_mantissas_sae!(SAE);
42799 let a: Simd = a.as_f32x4();
42800 let b: Simd = b.as_f32x4();
42801 let c: Simd = c.as_i32x4();
42802 let r: Simd = vfixupimmss(a, b, c, IMM8, mask:0b11111111, SAE);
42803 let fixupimm: f32 = simd_extract!(r, 0);
42804 let r: Simd = simd_insert!(a, 0, fixupimm);
42805 transmute(src:r)
42806 }
42807}
42808
42809/// Fix up the lower single-precision (32-bit) floating-point elements in a and b using the lower 32-bit integer in c, store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. imm8 is used to set the required flags reporting.\
42810/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
42811///
42812/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fixupimm_round_ss&expand=2512)
42813#[inline]
42814#[target_feature(enable = "avx512f")]
42815#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42816#[cfg_attr(test, assert_instr(vfixupimmss, IMM8 = 0, SAE = 8))]
42817#[rustc_legacy_const_generics(4, 5)]
42818pub fn _mm_mask_fixupimm_round_ss<const IMM8: i32, const SAE: i32>(
42819 a: __m128,
42820 k: __mmask8,
42821 b: __m128,
42822 c: __m128i,
42823) -> __m128 {
42824 unsafe {
42825 static_assert_uimm_bits!(IMM8, 8);
42826 static_assert_mantissas_sae!(SAE);
42827 let a: Simd = a.as_f32x4();
42828 let b: Simd = b.as_f32x4();
42829 let c: Simd = c.as_i32x4();
42830 let r: Simd = vfixupimmss(a, b, c, IMM8, mask:k, SAE);
42831 let fixupimm: f32 = simd_extract!(r, 0);
42832 let r: Simd = simd_insert!(a, 0, fixupimm);
42833 transmute(src:r)
42834 }
42835}
42836
42837/// Fix up the lower single-precision (32-bit) floating-point elements in a and b using the lower 32-bit integer in c, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. imm8 is used to set the required flags reporting.\
42838/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
42839///
42840/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fixupimm_round_ss&expand=2513)
42841#[inline]
42842#[target_feature(enable = "avx512f")]
42843#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42844#[cfg_attr(test, assert_instr(vfixupimmss, IMM8 = 0, SAE = 8))]
42845#[rustc_legacy_const_generics(4, 5)]
42846pub fn _mm_maskz_fixupimm_round_ss<const IMM8: i32, const SAE: i32>(
42847 k: __mmask8,
42848 a: __m128,
42849 b: __m128,
42850 c: __m128i,
42851) -> __m128 {
42852 unsafe {
42853 static_assert_uimm_bits!(IMM8, 8);
42854 static_assert_mantissas_sae!(SAE);
42855 let a: Simd = a.as_f32x4();
42856 let b: Simd = b.as_f32x4();
42857 let c: Simd = c.as_i32x4();
42858 let r: Simd = vfixupimmssz(a, b, c, IMM8, mask:k, SAE);
42859 let fixupimm: f32 = simd_extract!(r, 0);
42860 let r: Simd = simd_insert!(a, 0, fixupimm);
42861 transmute(src:r)
42862 }
42863}
42864
42865/// Fix up the lower double-precision (64-bit) floating-point elements in a and b using the lower 64-bit integer in c, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst. imm8 is used to set the required flags reporting.\
42866/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
42867///
42868/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fixupimm_round_sd&expand=2508)
42869#[inline]
42870#[target_feature(enable = "avx512f")]
42871#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42872#[cfg_attr(test, assert_instr(vfixupimmsd, IMM8 = 0, SAE = 8))]
42873#[rustc_legacy_const_generics(3, 4)]
42874pub fn _mm_fixupimm_round_sd<const IMM8: i32, const SAE: i32>(
42875 a: __m128d,
42876 b: __m128d,
42877 c: __m128i,
42878) -> __m128d {
42879 unsafe {
42880 static_assert_uimm_bits!(IMM8, 8);
42881 static_assert_mantissas_sae!(SAE);
42882 let a: Simd = a.as_f64x2();
42883 let b: Simd = b.as_f64x2();
42884 let c: Simd = c.as_i64x2();
42885 let r: Simd = vfixupimmsd(a, b, c, IMM8, mask:0b11111111, SAE);
42886 let fixupimm: f64 = simd_extract!(r, 0);
42887 let r: Simd = simd_insert!(a, 0, fixupimm);
42888 transmute(src:r)
42889 }
42890}
42891
42892/// Fix up the lower double-precision (64-bit) floating-point elements in a and b using the lower 64-bit integer in c, store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. imm8 is used to set the required flags reporting.\
42893/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
42894///
42895/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fixupimm_round_sd&expand=2509)
42896#[inline]
42897#[target_feature(enable = "avx512f")]
42898#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42899#[cfg_attr(test, assert_instr(vfixupimmsd, IMM8 = 0, SAE = 8))]
42900#[rustc_legacy_const_generics(4, 5)]
42901pub fn _mm_mask_fixupimm_round_sd<const IMM8: i32, const SAE: i32>(
42902 a: __m128d,
42903 k: __mmask8,
42904 b: __m128d,
42905 c: __m128i,
42906) -> __m128d {
42907 unsafe {
42908 static_assert_uimm_bits!(IMM8, 8);
42909 static_assert_mantissas_sae!(SAE);
42910 let a: Simd = a.as_f64x2();
42911 let b: Simd = b.as_f64x2();
42912 let c: Simd = c.as_i64x2();
42913 let r: Simd = vfixupimmsd(a, b, c, IMM8, mask:k, SAE);
42914 let fixupimm: f64 = simd_extract!(r, 0);
42915 let r: Simd = simd_insert!(a, 0, fixupimm);
42916 transmute(src:r)
42917 }
42918}
42919
42920/// Fix up the lower double-precision (64-bit) floating-point elements in a and b using the lower 64-bit integer in c, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. imm8 is used to set the required flags reporting.\
42921/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
42922///
42923/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fixupimm_round_sd&expand=2510)
42924#[inline]
42925#[target_feature(enable = "avx512f")]
42926#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42927#[cfg_attr(test, assert_instr(vfixupimmsd, IMM8 = 0, SAE = 8))]
42928#[rustc_legacy_const_generics(4, 5)]
42929pub fn _mm_maskz_fixupimm_round_sd<const IMM8: i32, const SAE: i32>(
42930 k: __mmask8,
42931 a: __m128d,
42932 b: __m128d,
42933 c: __m128i,
42934) -> __m128d {
42935 unsafe {
42936 static_assert_uimm_bits!(IMM8, 8);
42937 static_assert_mantissas_sae!(SAE);
42938 let a: Simd = a.as_f64x2();
42939 let b: Simd = b.as_f64x2();
42940 let c: Simd = c.as_i64x2();
42941 let r: Simd = vfixupimmsdz(a, b, c, IMM8, mask:k, SAE);
42942 let fixupimm: f64 = simd_extract!(r, 0);
42943 let r: Simd = simd_insert!(a, 0, fixupimm);
42944 transmute(src:r)
42945 }
42946}
42947
42948/// Convert the lower single-precision (32-bit) floating-point element in b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
42949///
42950/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_cvtss_sd&expand=1896)
42951#[inline]
42952#[target_feature(enable = "avx512f")]
42953#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42954#[cfg_attr(test, assert_instr(vcvtss2sd))]
42955pub fn _mm_mask_cvtss_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128) -> __m128d {
42956 unsafe {
42957 transmute(src:vcvtss2sd(
42958 a.as_f64x2(),
42959 b.as_f32x4(),
42960 src.as_f64x2(),
42961 mask:k,
42962 _MM_FROUND_CUR_DIRECTION,
42963 ))
42964 }
42965}
42966
42967/// Convert the lower single-precision (32-bit) floating-point element in b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
42968///
42969/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_cvtss_sd&expand=1897)
42970#[inline]
42971#[target_feature(enable = "avx512f")]
42972#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42973#[cfg_attr(test, assert_instr(vcvtss2sd))]
42974pub fn _mm_maskz_cvtss_sd(k: __mmask8, a: __m128d, b: __m128) -> __m128d {
42975 unsafe {
42976 transmute(src:vcvtss2sd(
42977 a.as_f64x2(),
42978 b.as_f32x4(),
42979 src:f64x2::ZERO,
42980 mask:k,
42981 _MM_FROUND_CUR_DIRECTION,
42982 ))
42983 }
42984}
42985
42986/// Convert the lower double-precision (64-bit) floating-point element in b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
42987///
42988/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_cvtsd_ss&expand=1797)
42989#[inline]
42990#[target_feature(enable = "avx512f")]
42991#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42992#[cfg_attr(test, assert_instr(vcvtsd2ss))]
42993pub fn _mm_mask_cvtsd_ss(src: __m128, k: __mmask8, a: __m128, b: __m128d) -> __m128 {
42994 unsafe {
42995 transmute(src:vcvtsd2ss(
42996 a.as_f32x4(),
42997 b.as_f64x2(),
42998 src.as_f32x4(),
42999 mask:k,
43000 _MM_FROUND_CUR_DIRECTION,
43001 ))
43002 }
43003}
43004
43005/// Convert the lower double-precision (64-bit) floating-point element in b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
43006///
43007/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_cvtsd_ss&expand=1798)
43008#[inline]
43009#[target_feature(enable = "avx512f")]
43010#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43011#[cfg_attr(test, assert_instr(vcvtsd2ss))]
43012pub fn _mm_maskz_cvtsd_ss(k: __mmask8, a: __m128, b: __m128d) -> __m128 {
43013 unsafe {
43014 transmute(src:vcvtsd2ss(
43015 a.as_f32x4(),
43016 b.as_f64x2(),
43017 src:f32x4::ZERO,
43018 mask:k,
43019 _MM_FROUND_CUR_DIRECTION,
43020 ))
43021 }
43022}
43023
43024/// Convert the lower single-precision (32-bit) floating-point element in b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
43025/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
43026///
43027/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvt_roundss_sd&expand=1371)
43028#[inline]
43029#[target_feature(enable = "avx512f")]
43030#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43031#[cfg_attr(test, assert_instr(vcvtss2sd, SAE = 8))]
43032#[rustc_legacy_const_generics(2)]
43033pub fn _mm_cvt_roundss_sd<const SAE: i32>(a: __m128d, b: __m128) -> __m128d {
43034 unsafe {
43035 static_assert_sae!(SAE);
43036 let a: Simd = a.as_f64x2();
43037 let b: Simd = b.as_f32x4();
43038 let r: Simd = vcvtss2sd(a, b, src:f64x2::ZERO, mask:0b11111111, SAE);
43039 transmute(src:r)
43040 }
43041}
43042
43043/// Convert the lower single-precision (32-bit) floating-point element in b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
43044/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
43045///
43046/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_cvt_roundss_sd&expand=1372)
43047#[inline]
43048#[target_feature(enable = "avx512f")]
43049#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43050#[cfg_attr(test, assert_instr(vcvtss2sd, SAE = 8))]
43051#[rustc_legacy_const_generics(4)]
43052pub fn _mm_mask_cvt_roundss_sd<const SAE: i32>(
43053 src: __m128d,
43054 k: __mmask8,
43055 a: __m128d,
43056 b: __m128,
43057) -> __m128d {
43058 unsafe {
43059 static_assert_sae!(SAE);
43060 let a: Simd = a.as_f64x2();
43061 let b: Simd = b.as_f32x4();
43062 let src: Simd = src.as_f64x2();
43063 let r: Simd = vcvtss2sd(a, b, src, mask:k, SAE);
43064 transmute(src:r)
43065 }
43066}
43067
43068/// Convert the lower single-precision (32-bit) floating-point element in b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
43069/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
43070///
43071/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_cvt_roundss_sd&expand=1373)
43072#[inline]
43073#[target_feature(enable = "avx512f")]
43074#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43075#[cfg_attr(test, assert_instr(vcvtss2sd, SAE = 8))]
43076#[rustc_legacy_const_generics(3)]
43077pub fn _mm_maskz_cvt_roundss_sd<const SAE: i32>(k: __mmask8, a: __m128d, b: __m128) -> __m128d {
43078 unsafe {
43079 static_assert_sae!(SAE);
43080 let a: Simd = a.as_f64x2();
43081 let b: Simd = b.as_f32x4();
43082 let r: Simd = vcvtss2sd(a, b, src:f64x2::ZERO, mask:k, SAE);
43083 transmute(src:r)
43084 }
43085}
43086
43087/// Convert the lower double-precision (64-bit) floating-point element in b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
43088/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
43089/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
43090/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
43091/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
43092/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
43093/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
43094///
43095/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvt_roundsd_ss&expand=1361)
43096#[inline]
43097#[target_feature(enable = "avx512f")]
43098#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43099#[cfg_attr(test, assert_instr(vcvtsd2ss, ROUNDING = 8))]
43100#[rustc_legacy_const_generics(2)]
43101pub fn _mm_cvt_roundsd_ss<const ROUNDING: i32>(a: __m128, b: __m128d) -> __m128 {
43102 unsafe {
43103 static_assert_rounding!(ROUNDING);
43104 let a: Simd = a.as_f32x4();
43105 let b: Simd = b.as_f64x2();
43106 let r: Simd = vcvtsd2ss(a, b, src:f32x4::ZERO, mask:0b11111111, ROUNDING);
43107 transmute(src:r)
43108 }
43109}
43110
43111/// Convert the lower double-precision (64-bit) floating-point element in b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
43112/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
43113/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
43114/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
43115/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
43116/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
43117/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
43118///
43119/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_cvt_roundsd_ss&expand=1362)
43120#[inline]
43121#[target_feature(enable = "avx512f")]
43122#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43123#[cfg_attr(test, assert_instr(vcvtsd2ss, ROUNDING = 8))]
43124#[rustc_legacy_const_generics(4)]
43125pub fn _mm_mask_cvt_roundsd_ss<const ROUNDING: i32>(
43126 src: __m128,
43127 k: __mmask8,
43128 a: __m128,
43129 b: __m128d,
43130) -> __m128 {
43131 unsafe {
43132 static_assert_rounding!(ROUNDING);
43133 let a: Simd = a.as_f32x4();
43134 let b: Simd = b.as_f64x2();
43135 let src: Simd = src.as_f32x4();
43136 let r: Simd = vcvtsd2ss(a, b, src, mask:k, ROUNDING);
43137 transmute(src:r)
43138 }
43139}
43140
43141/// Convert the lower double-precision (64-bit) floating-point element in b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
43142/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
43143/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
43144/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
43145/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
43146/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
43147/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
43148///
43149/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_cvt_roundsd_ss&expand=1363)
43150#[inline]
43151#[target_feature(enable = "avx512f")]
43152#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43153#[cfg_attr(test, assert_instr(vcvtsd2ss, ROUNDING = 8))]
43154#[rustc_legacy_const_generics(3)]
43155pub fn _mm_maskz_cvt_roundsd_ss<const ROUNDING: i32>(k: __mmask8, a: __m128, b: __m128d) -> __m128 {
43156 unsafe {
43157 static_assert_rounding!(ROUNDING);
43158 let a: Simd = a.as_f32x4();
43159 let b: Simd = b.as_f64x2();
43160 let r: Simd = vcvtsd2ss(a, b, src:f32x4::ZERO, mask:k, ROUNDING);
43161 transmute(src:r)
43162 }
43163}
43164
43165/// Convert the lower single-precision (32-bit) floating-point element in a to a 32-bit integer, and store the result in dst.\
43166/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
43167/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
43168/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
43169/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
43170/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
43171/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
43172///
43173/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvt_roundss_si32&expand=1374)
43174#[inline]
43175#[target_feature(enable = "avx512f")]
43176#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43177#[cfg_attr(test, assert_instr(vcvtss2si, ROUNDING = 8))]
43178#[rustc_legacy_const_generics(1)]
43179pub fn _mm_cvt_roundss_si32<const ROUNDING: i32>(a: __m128) -> i32 {
43180 unsafe {
43181 static_assert_rounding!(ROUNDING);
43182 let a: Simd = a.as_f32x4();
43183 vcvtss2si(a, ROUNDING)
43184 }
43185}
43186
43187/// Convert the lower single-precision (32-bit) floating-point element in a to a 32-bit integer, and store the result in dst.\
43188/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
43189/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
43190/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
43191/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
43192/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
43193/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
43194///
43195/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvt_roundss_i32&expand=1369)
43196#[inline]
43197#[target_feature(enable = "avx512f")]
43198#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43199#[cfg_attr(test, assert_instr(vcvtss2si, ROUNDING = 8))]
43200#[rustc_legacy_const_generics(1)]
43201pub fn _mm_cvt_roundss_i32<const ROUNDING: i32>(a: __m128) -> i32 {
43202 unsafe {
43203 static_assert_rounding!(ROUNDING);
43204 let a: Simd = a.as_f32x4();
43205 vcvtss2si(a, ROUNDING)
43206 }
43207}
43208
43209/// Convert the lower single-precision (32-bit) floating-point element in a to an unsigned 32-bit integer, and store the result in dst.\
43210/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
43211/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
43212/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
43213/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
43214/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
43215/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
43216///
43217/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvt_roundss_u32&expand=1376)
43218#[inline]
43219#[target_feature(enable = "avx512f")]
43220#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43221#[cfg_attr(test, assert_instr(vcvtss2usi, ROUNDING = 8))]
43222#[rustc_legacy_const_generics(1)]
43223pub fn _mm_cvt_roundss_u32<const ROUNDING: i32>(a: __m128) -> u32 {
43224 unsafe {
43225 static_assert_rounding!(ROUNDING);
43226 let a: Simd = a.as_f32x4();
43227 vcvtss2usi(a, ROUNDING)
43228 }
43229}
43230
43231/// Convert the lower single-precision (32-bit) floating-point element in a to a 32-bit integer, and store the result in dst.
43232///
43233/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvtss_i32&expand=1893)
43234#[inline]
43235#[target_feature(enable = "avx512f")]
43236#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43237#[cfg_attr(test, assert_instr(vcvtss2si))]
43238pub fn _mm_cvtss_i32(a: __m128) -> i32 {
43239 unsafe { vcvtss2si(a.as_f32x4(), _MM_FROUND_CUR_DIRECTION) }
43240}
43241
43242/// Convert the lower single-precision (32-bit) floating-point element in a to an unsigned 32-bit integer, and store the result in dst.
43243///
43244/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvtss_u32&expand=1901)
43245#[inline]
43246#[target_feature(enable = "avx512f")]
43247#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43248#[cfg_attr(test, assert_instr(vcvtss2usi))]
43249pub fn _mm_cvtss_u32(a: __m128) -> u32 {
43250 unsafe { vcvtss2usi(a.as_f32x4(), _MM_FROUND_CUR_DIRECTION) }
43251}
43252
43253/// Convert the lower double-precision (64-bit) floating-point element in a to a 32-bit integer, and store the result in dst.\
43254/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
43255/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
43256/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
43257/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
43258/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
43259/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
43260///
43261/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvt_roundsd_si32&expand=1359)
43262#[inline]
43263#[target_feature(enable = "avx512f")]
43264#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43265#[cfg_attr(test, assert_instr(vcvtsd2si, ROUNDING = 8))]
43266#[rustc_legacy_const_generics(1)]
43267pub fn _mm_cvt_roundsd_si32<const ROUNDING: i32>(a: __m128d) -> i32 {
43268 unsafe {
43269 static_assert_rounding!(ROUNDING);
43270 let a: Simd = a.as_f64x2();
43271 vcvtsd2si(a, ROUNDING)
43272 }
43273}
43274
43275/// Convert the lower single-precision (32-bit) floating-point element in a to a 32-bit integer, and store the result in dst.\
43276/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
43277/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
43278/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
43279/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
43280/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
43281/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
43282///
43283/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvt_roundsd_i32&expand=1357)
43284#[inline]
43285#[target_feature(enable = "avx512f")]
43286#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43287#[cfg_attr(test, assert_instr(vcvtsd2si, ROUNDING = 8))]
43288#[rustc_legacy_const_generics(1)]
43289pub fn _mm_cvt_roundsd_i32<const ROUNDING: i32>(a: __m128d) -> i32 {
43290 unsafe {
43291 static_assert_rounding!(ROUNDING);
43292 let a: Simd = a.as_f64x2();
43293 vcvtsd2si(a, ROUNDING)
43294 }
43295}
43296
43297/// Convert the lower double-precision (64-bit) floating-point element in a to an unsigned 32-bit integer, and store the result in dst.\
43298/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
43299/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
43300/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
43301/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
43302/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
43303/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
43304///
43305/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvt_roundsd_u32&expand=1364)
43306#[inline]
43307#[target_feature(enable = "avx512f")]
43308#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43309#[cfg_attr(test, assert_instr(vcvtsd2usi, ROUNDING = 8))]
43310#[rustc_legacy_const_generics(1)]
43311pub fn _mm_cvt_roundsd_u32<const ROUNDING: i32>(a: __m128d) -> u32 {
43312 unsafe {
43313 static_assert_rounding!(ROUNDING);
43314 let a: Simd = a.as_f64x2();
43315 vcvtsd2usi(a, ROUNDING)
43316 }
43317}
43318
43319/// Convert the lower double-precision (64-bit) floating-point element in a to a 32-bit integer, and store the result in dst.
43320///
43321/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvtsd_i32&expand=1791)
43322#[inline]
43323#[target_feature(enable = "avx512f")]
43324#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43325#[cfg_attr(test, assert_instr(vcvtsd2si))]
43326pub fn _mm_cvtsd_i32(a: __m128d) -> i32 {
43327 unsafe { vcvtsd2si(a.as_f64x2(), _MM_FROUND_CUR_DIRECTION) }
43328}
43329
43330/// Convert the lower double-precision (64-bit) floating-point element in a to an unsigned 32-bit integer, and store the result in dst.
43331///
43332/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvtsd_u32&expand=1799)
43333#[inline]
43334#[target_feature(enable = "avx512f")]
43335#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43336#[cfg_attr(test, assert_instr(vcvtsd2usi))]
43337pub fn _mm_cvtsd_u32(a: __m128d) -> u32 {
43338 unsafe { vcvtsd2usi(a.as_f64x2(), _MM_FROUND_CUR_DIRECTION) }
43339}
43340
43341/// Convert the signed 32-bit integer b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
43342///
43343/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
43344/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
43345/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
43346/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
43347/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
43348/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
43349///
43350/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvt_roundi32_ss&expand=1312)
43351#[inline]
43352#[target_feature(enable = "avx512f")]
43353#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43354#[cfg_attr(test, assert_instr(vcvtsi2ss, ROUNDING = 8))]
43355#[rustc_legacy_const_generics(2)]
43356pub fn _mm_cvt_roundi32_ss<const ROUNDING: i32>(a: __m128, b: i32) -> __m128 {
43357 unsafe {
43358 static_assert_rounding!(ROUNDING);
43359 let a: Simd = a.as_f32x4();
43360 let r: Simd = vcvtsi2ss(a, b, ROUNDING);
43361 transmute(src:r)
43362 }
43363}
43364
43365/// Convert the signed 32-bit integer b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
43366///
43367/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
43368/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
43369/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
43370/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
43371/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
43372/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
43373///
43374/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvt_roundsi32_ss&expand=1366)
43375#[inline]
43376#[target_feature(enable = "avx512f")]
43377#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43378#[cfg_attr(test, assert_instr(vcvtsi2ss, ROUNDING = 8))]
43379#[rustc_legacy_const_generics(2)]
43380pub fn _mm_cvt_roundsi32_ss<const ROUNDING: i32>(a: __m128, b: i32) -> __m128 {
43381 unsafe {
43382 static_assert_rounding!(ROUNDING);
43383 let a: Simd = a.as_f32x4();
43384 let r: Simd = vcvtsi2ss(a, b, ROUNDING);
43385 transmute(src:r)
43386 }
43387}
43388
43389/// Convert the unsigned 32-bit integer b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
43390/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
43391/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
43392/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
43393/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
43394/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
43395/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
43396///
43397/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvt_roundu32_ss&expand=1378)
43398#[inline]
43399#[target_feature(enable = "avx512f")]
43400#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43401#[cfg_attr(test, assert_instr(vcvtusi2ss, ROUNDING = 8))]
43402#[rustc_legacy_const_generics(2)]
43403pub fn _mm_cvt_roundu32_ss<const ROUNDING: i32>(a: __m128, b: u32) -> __m128 {
43404 unsafe {
43405 static_assert_rounding!(ROUNDING);
43406 let a: Simd = a.as_f32x4();
43407 let r: Simd = vcvtusi2ss(a, b, ROUNDING);
43408 transmute(src:r)
43409 }
43410}
43411
43412/// Convert the signed 32-bit integer b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.
43413///
43414/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvti32_ss&expand=1643)
43415#[inline]
43416#[target_feature(enable = "avx512f")]
43417#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43418#[cfg_attr(test, assert_instr(vcvtsi2ss))]
43419#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
43420pub const fn _mm_cvti32_ss(a: __m128, b: i32) -> __m128 {
43421 unsafe {
43422 let b: f32 = b as f32;
43423 simd_insert!(a, 0, b)
43424 }
43425}
43426
43427/// Convert the signed 32-bit integer b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.
43428///
43429/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvti32_sd&expand=1642)
43430#[inline]
43431#[target_feature(enable = "avx512f")]
43432#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43433#[cfg_attr(test, assert_instr(vcvtsi2sd))]
43434#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
43435pub const fn _mm_cvti32_sd(a: __m128d, b: i32) -> __m128d {
43436 unsafe {
43437 let b: f64 = b as f64;
43438 simd_insert!(a, 0, b)
43439 }
43440}
43441
43442/// Convert the lower single-precision (32-bit) floating-point element in a to a 32-bit integer with truncation, and store the result in dst.\
43443/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
43444///
43445/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvtt_roundss_si32&expand=1936)
43446#[inline]
43447#[target_feature(enable = "avx512f")]
43448#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43449#[cfg_attr(test, assert_instr(vcvttss2si, SAE = 8))]
43450#[rustc_legacy_const_generics(1)]
43451pub fn _mm_cvtt_roundss_si32<const SAE: i32>(a: __m128) -> i32 {
43452 unsafe {
43453 static_assert_sae!(SAE);
43454 let a: Simd = a.as_f32x4();
43455 vcvttss2si(a, SAE)
43456 }
43457}
43458
43459/// Convert the lower single-precision (32-bit) floating-point element in a to a 32-bit integer with truncation, and store the result in dst.\
43460/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
43461///
43462/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvtt_roundss_i32&expand=1934)
43463#[inline]
43464#[target_feature(enable = "avx512f")]
43465#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43466#[cfg_attr(test, assert_instr(vcvttss2si, SAE = 8))]
43467#[rustc_legacy_const_generics(1)]
43468pub fn _mm_cvtt_roundss_i32<const SAE: i32>(a: __m128) -> i32 {
43469 unsafe {
43470 static_assert_sae!(SAE);
43471 let a: Simd = a.as_f32x4();
43472 vcvttss2si(a, SAE)
43473 }
43474}
43475
43476/// Convert the lower single-precision (32-bit) floating-point element in a to an unsigned 32-bit integer with truncation, and store the result in dst.\
43477/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
43478///
43479/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvtt_roundss_u32&expand=1938)
43480#[inline]
43481#[target_feature(enable = "avx512f")]
43482#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43483#[cfg_attr(test, assert_instr(vcvttss2usi, SAE = 8))]
43484#[rustc_legacy_const_generics(1)]
43485pub fn _mm_cvtt_roundss_u32<const SAE: i32>(a: __m128) -> u32 {
43486 unsafe {
43487 static_assert_sae!(SAE);
43488 let a: Simd = a.as_f32x4();
43489 vcvttss2usi(a, SAE)
43490 }
43491}
43492
43493/// Convert the lower single-precision (32-bit) floating-point element in a to a 32-bit integer with truncation, and store the result in dst.
43494///
43495/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttss_i32&expand=2022)
43496#[inline]
43497#[target_feature(enable = "avx512f")]
43498#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43499#[cfg_attr(test, assert_instr(vcvttss2si))]
43500pub fn _mm_cvttss_i32(a: __m128) -> i32 {
43501 unsafe { vcvttss2si(a.as_f32x4(), _MM_FROUND_CUR_DIRECTION) }
43502}
43503
43504/// Convert the lower single-precision (32-bit) floating-point element in a to an unsigned 32-bit integer with truncation, and store the result in dst.
43505///
43506/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttss_u32&expand=2026)
43507#[inline]
43508#[target_feature(enable = "avx512f")]
43509#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43510#[cfg_attr(test, assert_instr(vcvttss2usi))]
43511pub fn _mm_cvttss_u32(a: __m128) -> u32 {
43512 unsafe { vcvttss2usi(a.as_f32x4(), _MM_FROUND_CUR_DIRECTION) }
43513}
43514
43515/// Convert the lower double-precision (64-bit) floating-point element in a to a 32-bit integer with truncation, and store the result in dst.\
43516/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
43517///
43518/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtt_roundsd_si32&expand=1930)
43519#[inline]
43520#[target_feature(enable = "avx512f")]
43521#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43522#[cfg_attr(test, assert_instr(vcvttsd2si, SAE = 8))]
43523#[rustc_legacy_const_generics(1)]
43524pub fn _mm_cvtt_roundsd_si32<const SAE: i32>(a: __m128d) -> i32 {
43525 unsafe {
43526 static_assert_sae!(SAE);
43527 let a: Simd = a.as_f64x2();
43528 vcvttsd2si(a, SAE)
43529 }
43530}
43531
43532/// Convert the lower double-precision (64-bit) floating-point element in a to a 32-bit integer with truncation, and store the result in dst.\
43533/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
43534///
43535/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtt_roundsd_i32&expand=1928)
43536#[inline]
43537#[target_feature(enable = "avx512f")]
43538#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43539#[cfg_attr(test, assert_instr(vcvttsd2si, SAE = 8))]
43540#[rustc_legacy_const_generics(1)]
43541pub fn _mm_cvtt_roundsd_i32<const SAE: i32>(a: __m128d) -> i32 {
43542 unsafe {
43543 static_assert_sae!(SAE);
43544 let a: Simd = a.as_f64x2();
43545 vcvttsd2si(a, SAE)
43546 }
43547}
43548
43549/// Convert the lower double-precision (64-bit) floating-point element in a to an unsigned 32-bit integer with truncation, and store the result in dst.\
43550/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
43551///
43552/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvtt_roundsd_u32&expand=1932)
43553#[inline]
43554#[target_feature(enable = "avx512f")]
43555#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43556#[cfg_attr(test, assert_instr(vcvttsd2usi, SAE = 8))]
43557#[rustc_legacy_const_generics(1)]
43558pub fn _mm_cvtt_roundsd_u32<const SAE: i32>(a: __m128d) -> u32 {
43559 unsafe {
43560 static_assert_sae!(SAE);
43561 let a: Simd = a.as_f64x2();
43562 vcvttsd2usi(a, SAE)
43563 }
43564}
43565
43566/// Convert the lower double-precision (64-bit) floating-point element in a to a 32-bit integer with truncation, and store the result in dst.
43567///
43568/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttsd_i32&expand=2015)
43569#[inline]
43570#[target_feature(enable = "avx512f")]
43571#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43572#[cfg_attr(test, assert_instr(vcvttsd2si))]
43573pub fn _mm_cvttsd_i32(a: __m128d) -> i32 {
43574 unsafe { vcvttsd2si(a.as_f64x2(), _MM_FROUND_CUR_DIRECTION) }
43575}
43576
43577/// Convert the lower double-precision (64-bit) floating-point element in a to an unsigned 32-bit integer with truncation, and store the result in dst.
43578///
43579/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttsd_u32&expand=2020)
43580#[inline]
43581#[target_feature(enable = "avx512f")]
43582#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43583#[cfg_attr(test, assert_instr(vcvttsd2usi))]
43584pub fn _mm_cvttsd_u32(a: __m128d) -> u32 {
43585 unsafe { vcvttsd2usi(a.as_f64x2(), _MM_FROUND_CUR_DIRECTION) }
43586}
43587
43588/// Convert the unsigned 32-bit integer b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.
43589///
43590/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtu32_ss&expand=2032)
43591#[inline]
43592#[target_feature(enable = "avx512f")]
43593#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43594#[cfg_attr(test, assert_instr(vcvtusi2ss))]
43595#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
43596pub const fn _mm_cvtu32_ss(a: __m128, b: u32) -> __m128 {
43597 unsafe {
43598 let b: f32 = b as f32;
43599 simd_insert!(a, 0, b)
43600 }
43601}
43602
43603/// Convert the unsigned 32-bit integer b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.
43604///
43605/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtu32_sd&expand=2031)
43606#[inline]
43607#[target_feature(enable = "avx512f")]
43608#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43609#[cfg_attr(test, assert_instr(vcvtusi2sd))]
43610#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
43611pub const fn _mm_cvtu32_sd(a: __m128d, b: u32) -> __m128d {
43612 unsafe {
43613 let b: f64 = b as f64;
43614 simd_insert!(a, 0, b)
43615 }
43616}
43617
43618/// Compare the lower single-precision (32-bit) floating-point element in a and b based on the comparison operand specified by imm8, and return the boolean result (0 or 1).\
43619/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
43620///
43621/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_comi_round_ss&expand=1175)
43622#[inline]
43623#[target_feature(enable = "avx512f")]
43624#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43625#[cfg_attr(test, assert_instr(vcmp, IMM5 = 5, SAE = 4))] //should be vcomiss
43626#[rustc_legacy_const_generics(2, 3)]
43627pub fn _mm_comi_round_ss<const IMM5: i32, const SAE: i32>(a: __m128, b: __m128) -> i32 {
43628 unsafe {
43629 static_assert_uimm_bits!(IMM5, 5);
43630 static_assert_mantissas_sae!(SAE);
43631 let a: Simd = a.as_f32x4();
43632 let b: Simd = b.as_f32x4();
43633 vcomiss(a, b, IMM5, SAE)
43634 }
43635}
43636
43637/// Compare the lower double-precision (64-bit) floating-point element in a and b based on the comparison operand specified by imm8, and return the boolean result (0 or 1).\
43638/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
43639///
43640/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_comi_round_sd&expand=1174)
43641#[inline]
43642#[target_feature(enable = "avx512f")]
43643#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43644#[cfg_attr(test, assert_instr(vcmp, IMM5 = 5, SAE = 4))] //should be vcomisd
43645#[rustc_legacy_const_generics(2, 3)]
43646pub fn _mm_comi_round_sd<const IMM5: i32, const SAE: i32>(a: __m128d, b: __m128d) -> i32 {
43647 unsafe {
43648 static_assert_uimm_bits!(IMM5, 5);
43649 static_assert_mantissas_sae!(SAE);
43650 let a: Simd = a.as_f64x2();
43651 let b: Simd = b.as_f64x2();
43652 vcomisd(a, b, IMM5, SAE)
43653 }
43654}
43655
43656/// Equal
43657#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43658pub const _MM_CMPINT_EQ: _MM_CMPINT_ENUM = 0x00;
43659/// Less-than
43660#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43661pub const _MM_CMPINT_LT: _MM_CMPINT_ENUM = 0x01;
43662/// Less-than-or-equal
43663#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43664pub const _MM_CMPINT_LE: _MM_CMPINT_ENUM = 0x02;
43665/// False
43666#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43667pub const _MM_CMPINT_FALSE: _MM_CMPINT_ENUM = 0x03;
43668/// Not-equal
43669#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43670pub const _MM_CMPINT_NE: _MM_CMPINT_ENUM = 0x04;
43671/// Not less-than
43672#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43673pub const _MM_CMPINT_NLT: _MM_CMPINT_ENUM = 0x05;
43674/// Not less-than-or-equal
43675#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43676pub const _MM_CMPINT_NLE: _MM_CMPINT_ENUM = 0x06;
43677/// True
43678#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43679pub const _MM_CMPINT_TRUE: _MM_CMPINT_ENUM = 0x07;
43680
43681/// interval [1, 2)
43682#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43683pub const _MM_MANT_NORM_1_2: _MM_MANTISSA_NORM_ENUM = 0x00;
43684/// interval [0.5, 2)
43685#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43686pub const _MM_MANT_NORM_P5_2: _MM_MANTISSA_NORM_ENUM = 0x01;
43687/// interval [0.5, 1)
43688#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43689pub const _MM_MANT_NORM_P5_1: _MM_MANTISSA_NORM_ENUM = 0x02;
43690/// interval [0.75, 1.5)
43691#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43692pub const _MM_MANT_NORM_P75_1P5: _MM_MANTISSA_NORM_ENUM = 0x03;
43693
43694/// sign = sign(SRC)
43695#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43696pub const _MM_MANT_SIGN_SRC: _MM_MANTISSA_SIGN_ENUM = 0x00;
43697/// sign = 0
43698#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43699pub const _MM_MANT_SIGN_ZERO: _MM_MANTISSA_SIGN_ENUM = 0x01;
43700/// DEST = NaN if sign(SRC) = 1
43701#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43702pub const _MM_MANT_SIGN_NAN: _MM_MANTISSA_SIGN_ENUM = 0x02;
43703
43704#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43705pub const _MM_PERM_AAAA: _MM_PERM_ENUM = 0x00;
43706#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43707pub const _MM_PERM_AAAB: _MM_PERM_ENUM = 0x01;
43708#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43709pub const _MM_PERM_AAAC: _MM_PERM_ENUM = 0x02;
43710#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43711pub const _MM_PERM_AAAD: _MM_PERM_ENUM = 0x03;
43712#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43713pub const _MM_PERM_AABA: _MM_PERM_ENUM = 0x04;
43714#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43715pub const _MM_PERM_AABB: _MM_PERM_ENUM = 0x05;
43716#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43717pub const _MM_PERM_AABC: _MM_PERM_ENUM = 0x06;
43718#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43719pub const _MM_PERM_AABD: _MM_PERM_ENUM = 0x07;
43720#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43721pub const _MM_PERM_AACA: _MM_PERM_ENUM = 0x08;
43722#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43723pub const _MM_PERM_AACB: _MM_PERM_ENUM = 0x09;
43724#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43725pub const _MM_PERM_AACC: _MM_PERM_ENUM = 0x0A;
43726#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43727pub const _MM_PERM_AACD: _MM_PERM_ENUM = 0x0B;
43728#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43729pub const _MM_PERM_AADA: _MM_PERM_ENUM = 0x0C;
43730#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43731pub const _MM_PERM_AADB: _MM_PERM_ENUM = 0x0D;
43732#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43733pub const _MM_PERM_AADC: _MM_PERM_ENUM = 0x0E;
43734#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43735pub const _MM_PERM_AADD: _MM_PERM_ENUM = 0x0F;
43736#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43737pub const _MM_PERM_ABAA: _MM_PERM_ENUM = 0x10;
43738#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43739pub const _MM_PERM_ABAB: _MM_PERM_ENUM = 0x11;
43740#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43741pub const _MM_PERM_ABAC: _MM_PERM_ENUM = 0x12;
43742#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43743pub const _MM_PERM_ABAD: _MM_PERM_ENUM = 0x13;
43744#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43745pub const _MM_PERM_ABBA: _MM_PERM_ENUM = 0x14;
43746#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43747pub const _MM_PERM_ABBB: _MM_PERM_ENUM = 0x15;
43748#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43749pub const _MM_PERM_ABBC: _MM_PERM_ENUM = 0x16;
43750#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43751pub const _MM_PERM_ABBD: _MM_PERM_ENUM = 0x17;
43752#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43753pub const _MM_PERM_ABCA: _MM_PERM_ENUM = 0x18;
43754#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43755pub const _MM_PERM_ABCB: _MM_PERM_ENUM = 0x19;
43756#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43757pub const _MM_PERM_ABCC: _MM_PERM_ENUM = 0x1A;
43758#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43759pub const _MM_PERM_ABCD: _MM_PERM_ENUM = 0x1B;
43760#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43761pub const _MM_PERM_ABDA: _MM_PERM_ENUM = 0x1C;
43762#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43763pub const _MM_PERM_ABDB: _MM_PERM_ENUM = 0x1D;
43764#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43765pub const _MM_PERM_ABDC: _MM_PERM_ENUM = 0x1E;
43766#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43767pub const _MM_PERM_ABDD: _MM_PERM_ENUM = 0x1F;
43768#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43769pub const _MM_PERM_ACAA: _MM_PERM_ENUM = 0x20;
43770#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43771pub const _MM_PERM_ACAB: _MM_PERM_ENUM = 0x21;
43772#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43773pub const _MM_PERM_ACAC: _MM_PERM_ENUM = 0x22;
43774#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43775pub const _MM_PERM_ACAD: _MM_PERM_ENUM = 0x23;
43776#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43777pub const _MM_PERM_ACBA: _MM_PERM_ENUM = 0x24;
43778#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43779pub const _MM_PERM_ACBB: _MM_PERM_ENUM = 0x25;
43780#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43781pub const _MM_PERM_ACBC: _MM_PERM_ENUM = 0x26;
43782#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43783pub const _MM_PERM_ACBD: _MM_PERM_ENUM = 0x27;
43784#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43785pub const _MM_PERM_ACCA: _MM_PERM_ENUM = 0x28;
43786#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43787pub const _MM_PERM_ACCB: _MM_PERM_ENUM = 0x29;
43788#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43789pub const _MM_PERM_ACCC: _MM_PERM_ENUM = 0x2A;
43790#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43791pub const _MM_PERM_ACCD: _MM_PERM_ENUM = 0x2B;
43792#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43793pub const _MM_PERM_ACDA: _MM_PERM_ENUM = 0x2C;
43794#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43795pub const _MM_PERM_ACDB: _MM_PERM_ENUM = 0x2D;
43796#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43797pub const _MM_PERM_ACDC: _MM_PERM_ENUM = 0x2E;
43798#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43799pub const _MM_PERM_ACDD: _MM_PERM_ENUM = 0x2F;
43800#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43801pub const _MM_PERM_ADAA: _MM_PERM_ENUM = 0x30;
43802#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43803pub const _MM_PERM_ADAB: _MM_PERM_ENUM = 0x31;
43804#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43805pub const _MM_PERM_ADAC: _MM_PERM_ENUM = 0x32;
43806#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43807pub const _MM_PERM_ADAD: _MM_PERM_ENUM = 0x33;
43808#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43809pub const _MM_PERM_ADBA: _MM_PERM_ENUM = 0x34;
43810#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43811pub const _MM_PERM_ADBB: _MM_PERM_ENUM = 0x35;
43812#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43813pub const _MM_PERM_ADBC: _MM_PERM_ENUM = 0x36;
43814#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43815pub const _MM_PERM_ADBD: _MM_PERM_ENUM = 0x37;
43816#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43817pub const _MM_PERM_ADCA: _MM_PERM_ENUM = 0x38;
43818#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43819pub const _MM_PERM_ADCB: _MM_PERM_ENUM = 0x39;
43820#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43821pub const _MM_PERM_ADCC: _MM_PERM_ENUM = 0x3A;
43822#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43823pub const _MM_PERM_ADCD: _MM_PERM_ENUM = 0x3B;
43824#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43825pub const _MM_PERM_ADDA: _MM_PERM_ENUM = 0x3C;
43826#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43827pub const _MM_PERM_ADDB: _MM_PERM_ENUM = 0x3D;
43828#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43829pub const _MM_PERM_ADDC: _MM_PERM_ENUM = 0x3E;
43830#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43831pub const _MM_PERM_ADDD: _MM_PERM_ENUM = 0x3F;
43832#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43833pub const _MM_PERM_BAAA: _MM_PERM_ENUM = 0x40;
43834#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43835pub const _MM_PERM_BAAB: _MM_PERM_ENUM = 0x41;
43836#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43837pub const _MM_PERM_BAAC: _MM_PERM_ENUM = 0x42;
43838#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43839pub const _MM_PERM_BAAD: _MM_PERM_ENUM = 0x43;
43840#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43841pub const _MM_PERM_BABA: _MM_PERM_ENUM = 0x44;
43842#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43843pub const _MM_PERM_BABB: _MM_PERM_ENUM = 0x45;
43844#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43845pub const _MM_PERM_BABC: _MM_PERM_ENUM = 0x46;
43846#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43847pub const _MM_PERM_BABD: _MM_PERM_ENUM = 0x47;
43848#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43849pub const _MM_PERM_BACA: _MM_PERM_ENUM = 0x48;
43850#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43851pub const _MM_PERM_BACB: _MM_PERM_ENUM = 0x49;
43852#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43853pub const _MM_PERM_BACC: _MM_PERM_ENUM = 0x4A;
43854#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43855pub const _MM_PERM_BACD: _MM_PERM_ENUM = 0x4B;
43856#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43857pub const _MM_PERM_BADA: _MM_PERM_ENUM = 0x4C;
43858#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43859pub const _MM_PERM_BADB: _MM_PERM_ENUM = 0x4D;
43860#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43861pub const _MM_PERM_BADC: _MM_PERM_ENUM = 0x4E;
43862#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43863pub const _MM_PERM_BADD: _MM_PERM_ENUM = 0x4F;
43864#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43865pub const _MM_PERM_BBAA: _MM_PERM_ENUM = 0x50;
43866#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43867pub const _MM_PERM_BBAB: _MM_PERM_ENUM = 0x51;
43868#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43869pub const _MM_PERM_BBAC: _MM_PERM_ENUM = 0x52;
43870#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43871pub const _MM_PERM_BBAD: _MM_PERM_ENUM = 0x53;
43872#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43873pub const _MM_PERM_BBBA: _MM_PERM_ENUM = 0x54;
43874#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43875pub const _MM_PERM_BBBB: _MM_PERM_ENUM = 0x55;
43876#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43877pub const _MM_PERM_BBBC: _MM_PERM_ENUM = 0x56;
43878#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43879pub const _MM_PERM_BBBD: _MM_PERM_ENUM = 0x57;
43880#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43881pub const _MM_PERM_BBCA: _MM_PERM_ENUM = 0x58;
43882#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43883pub const _MM_PERM_BBCB: _MM_PERM_ENUM = 0x59;
43884#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43885pub const _MM_PERM_BBCC: _MM_PERM_ENUM = 0x5A;
43886#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43887pub const _MM_PERM_BBCD: _MM_PERM_ENUM = 0x5B;
43888#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43889pub const _MM_PERM_BBDA: _MM_PERM_ENUM = 0x5C;
43890#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43891pub const _MM_PERM_BBDB: _MM_PERM_ENUM = 0x5D;
43892#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43893pub const _MM_PERM_BBDC: _MM_PERM_ENUM = 0x5E;
43894#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43895pub const _MM_PERM_BBDD: _MM_PERM_ENUM = 0x5F;
43896#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43897pub const _MM_PERM_BCAA: _MM_PERM_ENUM = 0x60;
43898#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43899pub const _MM_PERM_BCAB: _MM_PERM_ENUM = 0x61;
43900#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43901pub const _MM_PERM_BCAC: _MM_PERM_ENUM = 0x62;
43902#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43903pub const _MM_PERM_BCAD: _MM_PERM_ENUM = 0x63;
43904#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43905pub const _MM_PERM_BCBA: _MM_PERM_ENUM = 0x64;
43906#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43907pub const _MM_PERM_BCBB: _MM_PERM_ENUM = 0x65;
43908#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43909pub const _MM_PERM_BCBC: _MM_PERM_ENUM = 0x66;
43910#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43911pub const _MM_PERM_BCBD: _MM_PERM_ENUM = 0x67;
43912#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43913pub const _MM_PERM_BCCA: _MM_PERM_ENUM = 0x68;
43914#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43915pub const _MM_PERM_BCCB: _MM_PERM_ENUM = 0x69;
43916#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43917pub const _MM_PERM_BCCC: _MM_PERM_ENUM = 0x6A;
43918#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43919pub const _MM_PERM_BCCD: _MM_PERM_ENUM = 0x6B;
43920#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43921pub const _MM_PERM_BCDA: _MM_PERM_ENUM = 0x6C;
43922#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43923pub const _MM_PERM_BCDB: _MM_PERM_ENUM = 0x6D;
43924#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43925pub const _MM_PERM_BCDC: _MM_PERM_ENUM = 0x6E;
43926#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43927pub const _MM_PERM_BCDD: _MM_PERM_ENUM = 0x6F;
43928#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43929pub const _MM_PERM_BDAA: _MM_PERM_ENUM = 0x70;
43930#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43931pub const _MM_PERM_BDAB: _MM_PERM_ENUM = 0x71;
43932#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43933pub const _MM_PERM_BDAC: _MM_PERM_ENUM = 0x72;
43934#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43935pub const _MM_PERM_BDAD: _MM_PERM_ENUM = 0x73;
43936#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43937pub const _MM_PERM_BDBA: _MM_PERM_ENUM = 0x74;
43938#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43939pub const _MM_PERM_BDBB: _MM_PERM_ENUM = 0x75;
43940#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43941pub const _MM_PERM_BDBC: _MM_PERM_ENUM = 0x76;
43942#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43943pub const _MM_PERM_BDBD: _MM_PERM_ENUM = 0x77;
43944#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43945pub const _MM_PERM_BDCA: _MM_PERM_ENUM = 0x78;
43946#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43947pub const _MM_PERM_BDCB: _MM_PERM_ENUM = 0x79;
43948#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43949pub const _MM_PERM_BDCC: _MM_PERM_ENUM = 0x7A;
43950#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43951pub const _MM_PERM_BDCD: _MM_PERM_ENUM = 0x7B;
43952#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43953pub const _MM_PERM_BDDA: _MM_PERM_ENUM = 0x7C;
43954#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43955pub const _MM_PERM_BDDB: _MM_PERM_ENUM = 0x7D;
43956#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43957pub const _MM_PERM_BDDC: _MM_PERM_ENUM = 0x7E;
43958#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43959pub const _MM_PERM_BDDD: _MM_PERM_ENUM = 0x7F;
43960#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43961pub const _MM_PERM_CAAA: _MM_PERM_ENUM = 0x80;
43962#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43963pub const _MM_PERM_CAAB: _MM_PERM_ENUM = 0x81;
43964#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43965pub const _MM_PERM_CAAC: _MM_PERM_ENUM = 0x82;
43966#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43967pub const _MM_PERM_CAAD: _MM_PERM_ENUM = 0x83;
43968#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43969pub const _MM_PERM_CABA: _MM_PERM_ENUM = 0x84;
43970#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43971pub const _MM_PERM_CABB: _MM_PERM_ENUM = 0x85;
43972#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43973pub const _MM_PERM_CABC: _MM_PERM_ENUM = 0x86;
43974#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43975pub const _MM_PERM_CABD: _MM_PERM_ENUM = 0x87;
43976#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43977pub const _MM_PERM_CACA: _MM_PERM_ENUM = 0x88;
43978#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43979pub const _MM_PERM_CACB: _MM_PERM_ENUM = 0x89;
43980#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43981pub const _MM_PERM_CACC: _MM_PERM_ENUM = 0x8A;
43982#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43983pub const _MM_PERM_CACD: _MM_PERM_ENUM = 0x8B;
43984#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43985pub const _MM_PERM_CADA: _MM_PERM_ENUM = 0x8C;
43986#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43987pub const _MM_PERM_CADB: _MM_PERM_ENUM = 0x8D;
43988#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43989pub const _MM_PERM_CADC: _MM_PERM_ENUM = 0x8E;
43990#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43991pub const _MM_PERM_CADD: _MM_PERM_ENUM = 0x8F;
43992#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43993pub const _MM_PERM_CBAA: _MM_PERM_ENUM = 0x90;
43994#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43995pub const _MM_PERM_CBAB: _MM_PERM_ENUM = 0x91;
43996#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43997pub const _MM_PERM_CBAC: _MM_PERM_ENUM = 0x92;
43998#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43999pub const _MM_PERM_CBAD: _MM_PERM_ENUM = 0x93;
44000#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44001pub const _MM_PERM_CBBA: _MM_PERM_ENUM = 0x94;
44002#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44003pub const _MM_PERM_CBBB: _MM_PERM_ENUM = 0x95;
44004#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44005pub const _MM_PERM_CBBC: _MM_PERM_ENUM = 0x96;
44006#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44007pub const _MM_PERM_CBBD: _MM_PERM_ENUM = 0x97;
44008#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44009pub const _MM_PERM_CBCA: _MM_PERM_ENUM = 0x98;
44010#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44011pub const _MM_PERM_CBCB: _MM_PERM_ENUM = 0x99;
44012#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44013pub const _MM_PERM_CBCC: _MM_PERM_ENUM = 0x9A;
44014#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44015pub const _MM_PERM_CBCD: _MM_PERM_ENUM = 0x9B;
44016#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44017pub const _MM_PERM_CBDA: _MM_PERM_ENUM = 0x9C;
44018#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44019pub const _MM_PERM_CBDB: _MM_PERM_ENUM = 0x9D;
44020#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44021pub const _MM_PERM_CBDC: _MM_PERM_ENUM = 0x9E;
44022#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44023pub const _MM_PERM_CBDD: _MM_PERM_ENUM = 0x9F;
44024#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44025pub const _MM_PERM_CCAA: _MM_PERM_ENUM = 0xA0;
44026#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44027pub const _MM_PERM_CCAB: _MM_PERM_ENUM = 0xA1;
44028#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44029pub const _MM_PERM_CCAC: _MM_PERM_ENUM = 0xA2;
44030#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44031pub const _MM_PERM_CCAD: _MM_PERM_ENUM = 0xA3;
44032#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44033pub const _MM_PERM_CCBA: _MM_PERM_ENUM = 0xA4;
44034#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44035pub const _MM_PERM_CCBB: _MM_PERM_ENUM = 0xA5;
44036#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44037pub const _MM_PERM_CCBC: _MM_PERM_ENUM = 0xA6;
44038#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44039pub const _MM_PERM_CCBD: _MM_PERM_ENUM = 0xA7;
44040#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44041pub const _MM_PERM_CCCA: _MM_PERM_ENUM = 0xA8;
44042#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44043pub const _MM_PERM_CCCB: _MM_PERM_ENUM = 0xA9;
44044#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44045pub const _MM_PERM_CCCC: _MM_PERM_ENUM = 0xAA;
44046#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44047pub const _MM_PERM_CCCD: _MM_PERM_ENUM = 0xAB;
44048#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44049pub const _MM_PERM_CCDA: _MM_PERM_ENUM = 0xAC;
44050#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44051pub const _MM_PERM_CCDB: _MM_PERM_ENUM = 0xAD;
44052#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44053pub const _MM_PERM_CCDC: _MM_PERM_ENUM = 0xAE;
44054#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44055pub const _MM_PERM_CCDD: _MM_PERM_ENUM = 0xAF;
44056#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44057pub const _MM_PERM_CDAA: _MM_PERM_ENUM = 0xB0;
44058#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44059pub const _MM_PERM_CDAB: _MM_PERM_ENUM = 0xB1;
44060#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44061pub const _MM_PERM_CDAC: _MM_PERM_ENUM = 0xB2;
44062#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44063pub const _MM_PERM_CDAD: _MM_PERM_ENUM = 0xB3;
44064#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44065pub const _MM_PERM_CDBA: _MM_PERM_ENUM = 0xB4;
44066#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44067pub const _MM_PERM_CDBB: _MM_PERM_ENUM = 0xB5;
44068#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44069pub const _MM_PERM_CDBC: _MM_PERM_ENUM = 0xB6;
44070#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44071pub const _MM_PERM_CDBD: _MM_PERM_ENUM = 0xB7;
44072#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44073pub const _MM_PERM_CDCA: _MM_PERM_ENUM = 0xB8;
44074#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44075pub const _MM_PERM_CDCB: _MM_PERM_ENUM = 0xB9;
44076#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44077pub const _MM_PERM_CDCC: _MM_PERM_ENUM = 0xBA;
44078#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44079pub const _MM_PERM_CDCD: _MM_PERM_ENUM = 0xBB;
44080#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44081pub const _MM_PERM_CDDA: _MM_PERM_ENUM = 0xBC;
44082#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44083pub const _MM_PERM_CDDB: _MM_PERM_ENUM = 0xBD;
44084#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44085pub const _MM_PERM_CDDC: _MM_PERM_ENUM = 0xBE;
44086#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44087pub const _MM_PERM_CDDD: _MM_PERM_ENUM = 0xBF;
44088#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44089pub const _MM_PERM_DAAA: _MM_PERM_ENUM = 0xC0;
44090#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44091pub const _MM_PERM_DAAB: _MM_PERM_ENUM = 0xC1;
44092#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44093pub const _MM_PERM_DAAC: _MM_PERM_ENUM = 0xC2;
44094#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44095pub const _MM_PERM_DAAD: _MM_PERM_ENUM = 0xC3;
44096#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44097pub const _MM_PERM_DABA: _MM_PERM_ENUM = 0xC4;
44098#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44099pub const _MM_PERM_DABB: _MM_PERM_ENUM = 0xC5;
44100#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44101pub const _MM_PERM_DABC: _MM_PERM_ENUM = 0xC6;
44102#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44103pub const _MM_PERM_DABD: _MM_PERM_ENUM = 0xC7;
44104#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44105pub const _MM_PERM_DACA: _MM_PERM_ENUM = 0xC8;
44106#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44107pub const _MM_PERM_DACB: _MM_PERM_ENUM = 0xC9;
44108#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44109pub const _MM_PERM_DACC: _MM_PERM_ENUM = 0xCA;
44110#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44111pub const _MM_PERM_DACD: _MM_PERM_ENUM = 0xCB;
44112#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44113pub const _MM_PERM_DADA: _MM_PERM_ENUM = 0xCC;
44114#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44115pub const _MM_PERM_DADB: _MM_PERM_ENUM = 0xCD;
44116#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44117pub const _MM_PERM_DADC: _MM_PERM_ENUM = 0xCE;
44118#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44119pub const _MM_PERM_DADD: _MM_PERM_ENUM = 0xCF;
44120#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44121pub const _MM_PERM_DBAA: _MM_PERM_ENUM = 0xD0;
44122#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44123pub const _MM_PERM_DBAB: _MM_PERM_ENUM = 0xD1;
44124#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44125pub const _MM_PERM_DBAC: _MM_PERM_ENUM = 0xD2;
44126#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44127pub const _MM_PERM_DBAD: _MM_PERM_ENUM = 0xD3;
44128#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44129pub const _MM_PERM_DBBA: _MM_PERM_ENUM = 0xD4;
44130#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44131pub const _MM_PERM_DBBB: _MM_PERM_ENUM = 0xD5;
44132#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44133pub const _MM_PERM_DBBC: _MM_PERM_ENUM = 0xD6;
44134#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44135pub const _MM_PERM_DBBD: _MM_PERM_ENUM = 0xD7;
44136#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44137pub const _MM_PERM_DBCA: _MM_PERM_ENUM = 0xD8;
44138#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44139pub const _MM_PERM_DBCB: _MM_PERM_ENUM = 0xD9;
44140#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44141pub const _MM_PERM_DBCC: _MM_PERM_ENUM = 0xDA;
44142#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44143pub const _MM_PERM_DBCD: _MM_PERM_ENUM = 0xDB;
44144#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44145pub const _MM_PERM_DBDA: _MM_PERM_ENUM = 0xDC;
44146#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44147pub const _MM_PERM_DBDB: _MM_PERM_ENUM = 0xDD;
44148#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44149pub const _MM_PERM_DBDC: _MM_PERM_ENUM = 0xDE;
44150#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44151pub const _MM_PERM_DBDD: _MM_PERM_ENUM = 0xDF;
44152#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44153pub const _MM_PERM_DCAA: _MM_PERM_ENUM = 0xE0;
44154#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44155pub const _MM_PERM_DCAB: _MM_PERM_ENUM = 0xE1;
44156#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44157pub const _MM_PERM_DCAC: _MM_PERM_ENUM = 0xE2;
44158#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44159pub const _MM_PERM_DCAD: _MM_PERM_ENUM = 0xE3;
44160#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44161pub const _MM_PERM_DCBA: _MM_PERM_ENUM = 0xE4;
44162#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44163pub const _MM_PERM_DCBB: _MM_PERM_ENUM = 0xE5;
44164#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44165pub const _MM_PERM_DCBC: _MM_PERM_ENUM = 0xE6;
44166#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44167pub const _MM_PERM_DCBD: _MM_PERM_ENUM = 0xE7;
44168#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44169pub const _MM_PERM_DCCA: _MM_PERM_ENUM = 0xE8;
44170#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44171pub const _MM_PERM_DCCB: _MM_PERM_ENUM = 0xE9;
44172#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44173pub const _MM_PERM_DCCC: _MM_PERM_ENUM = 0xEA;
44174#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44175pub const _MM_PERM_DCCD: _MM_PERM_ENUM = 0xEB;
44176#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44177pub const _MM_PERM_DCDA: _MM_PERM_ENUM = 0xEC;
44178#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44179pub const _MM_PERM_DCDB: _MM_PERM_ENUM = 0xED;
44180#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44181pub const _MM_PERM_DCDC: _MM_PERM_ENUM = 0xEE;
44182#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44183pub const _MM_PERM_DCDD: _MM_PERM_ENUM = 0xEF;
44184#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44185pub const _MM_PERM_DDAA: _MM_PERM_ENUM = 0xF0;
44186#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44187pub const _MM_PERM_DDAB: _MM_PERM_ENUM = 0xF1;
44188#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44189pub const _MM_PERM_DDAC: _MM_PERM_ENUM = 0xF2;
44190#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44191pub const _MM_PERM_DDAD: _MM_PERM_ENUM = 0xF3;
44192#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44193pub const _MM_PERM_DDBA: _MM_PERM_ENUM = 0xF4;
44194#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44195pub const _MM_PERM_DDBB: _MM_PERM_ENUM = 0xF5;
44196#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44197pub const _MM_PERM_DDBC: _MM_PERM_ENUM = 0xF6;
44198#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44199pub const _MM_PERM_DDBD: _MM_PERM_ENUM = 0xF7;
44200#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44201pub const _MM_PERM_DDCA: _MM_PERM_ENUM = 0xF8;
44202#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44203pub const _MM_PERM_DDCB: _MM_PERM_ENUM = 0xF9;
44204#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44205pub const _MM_PERM_DDCC: _MM_PERM_ENUM = 0xFA;
44206#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44207pub const _MM_PERM_DDCD: _MM_PERM_ENUM = 0xFB;
44208#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44209pub const _MM_PERM_DDDA: _MM_PERM_ENUM = 0xFC;
44210#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44211pub const _MM_PERM_DDDB: _MM_PERM_ENUM = 0xFD;
44212#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44213pub const _MM_PERM_DDDC: _MM_PERM_ENUM = 0xFE;
44214#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44215pub const _MM_PERM_DDDD: _MM_PERM_ENUM = 0xFF;
44216
44217#[allow(improper_ctypes)]
44218unsafe extern "C" {
44219 #[link_name = "llvm.x86.avx512.sqrt.ps.512"]
44220 unsafefn vsqrtps(a: f32x16, rounding: i32) -> f32x16;
44221 #[link_name = "llvm.x86.avx512.sqrt.pd.512"]
44222 unsafefn vsqrtpd(a: f64x8, rounding: i32) -> f64x8;
44223
44224 #[link_name = "llvm.x86.avx512.vfmadd.ps.512"]
44225 unsafefn vfmadd132psround(a: __m512, b: __m512, c: __m512, rounding: i32) -> __m512;
44226 #[link_name = "llvm.x86.avx512.vfmadd.pd.512"]
44227 unsafefn vfmadd132pdround(a: __m512d, b: __m512d, c: __m512d, rounding: i32) -> __m512d;
44228
44229 #[link_name = "llvm.x86.avx512.vfmaddsub.ps.512"]
44230 unsafefn vfmaddsubpsround(a: __m512, b: __m512, c: __m512, rounding: i32) -> __m512; //from clang
44231 #[link_name = "llvm.x86.avx512.vfmaddsub.pd.512"]
44232 unsafefn vfmaddsubpdround(a: __m512d, b: __m512d, c: __m512d, rounding: i32) -> __m512d; //from clang
44233
44234 #[link_name = "llvm.x86.avx512.add.ps.512"]
44235 unsafefn vaddps(a: f32x16, b: f32x16, rounding: i32) -> f32x16;
44236 #[link_name = "llvm.x86.avx512.add.pd.512"]
44237 unsafefn vaddpd(a: f64x8, b: f64x8, rounding: i32) -> f64x8;
44238 #[link_name = "llvm.x86.avx512.sub.ps.512"]
44239 unsafefn vsubps(a: f32x16, b: f32x16, rounding: i32) -> f32x16;
44240 #[link_name = "llvm.x86.avx512.sub.pd.512"]
44241 unsafefn vsubpd(a: f64x8, b: f64x8, rounding: i32) -> f64x8;
44242 #[link_name = "llvm.x86.avx512.mul.ps.512"]
44243 unsafefn vmulps(a: f32x16, b: f32x16, rounding: i32) -> f32x16;
44244 #[link_name = "llvm.x86.avx512.mul.pd.512"]
44245 unsafefn vmulpd(a: f64x8, b: f64x8, rounding: i32) -> f64x8;
44246 #[link_name = "llvm.x86.avx512.div.ps.512"]
44247 unsafefn vdivps(a: f32x16, b: f32x16, rounding: i32) -> f32x16;
44248 #[link_name = "llvm.x86.avx512.div.pd.512"]
44249 unsafefn vdivpd(a: f64x8, b: f64x8, rounding: i32) -> f64x8;
44250
44251 #[link_name = "llvm.x86.avx512.max.ps.512"]
44252 unsafefn vmaxps(a: f32x16, b: f32x16, sae: i32) -> f32x16;
44253 #[link_name = "llvm.x86.avx512.max.pd.512"]
44254 unsafefn vmaxpd(a: f64x8, b: f64x8, sae: i32) -> f64x8;
44255 #[link_name = "llvm.x86.avx512.min.ps.512"]
44256 unsafefn vminps(a: f32x16, b: f32x16, sae: i32) -> f32x16;
44257 #[link_name = "llvm.x86.avx512.min.pd.512"]
44258 unsafefn vminpd(a: f64x8, b: f64x8, sae: i32) -> f64x8;
44259
44260 #[link_name = "llvm.x86.avx512.mask.getexp.ps.512"]
44261 unsafefn vgetexpps(a: f32x16, src: f32x16, m: u16, sae: i32) -> f32x16;
44262
44263 #[link_name = "llvm.x86.avx512.mask.getexp.ps.256"]
44264 unsafefn vgetexpps256(a: f32x8, src: f32x8, m: u8) -> f32x8;
44265 #[link_name = "llvm.x86.avx512.mask.getexp.ps.128"]
44266 unsafefn vgetexpps128(a: f32x4, src: f32x4, m: u8) -> f32x4;
44267
44268 #[link_name = "llvm.x86.avx512.mask.getexp.pd.512"]
44269 unsafefn vgetexppd(a: f64x8, src: f64x8, m: u8, sae: i32) -> f64x8;
44270 #[link_name = "llvm.x86.avx512.mask.getexp.pd.256"]
44271 unsafefn vgetexppd256(a: f64x4, src: f64x4, m: u8) -> f64x4;
44272 #[link_name = "llvm.x86.avx512.mask.getexp.pd.128"]
44273 unsafefn vgetexppd128(a: f64x2, src: f64x2, m: u8) -> f64x2;
44274
44275 #[link_name = "llvm.x86.avx512.mask.rndscale.ps.512"]
44276 unsafefn vrndscaleps(a: f32x16, imm8: i32, src: f32x16, mask: u16, sae: i32) -> f32x16;
44277 #[link_name = "llvm.x86.avx512.mask.rndscale.ps.256"]
44278 unsafefn vrndscaleps256(a: f32x8, imm8: i32, src: f32x8, mask: u8) -> f32x8;
44279 #[link_name = "llvm.x86.avx512.mask.rndscale.ps.128"]
44280 unsafefn vrndscaleps128(a: f32x4, imm8: i32, src: f32x4, mask: u8) -> f32x4;
44281
44282 #[link_name = "llvm.x86.avx512.mask.rndscale.pd.512"]
44283 unsafefn vrndscalepd(a: f64x8, imm8: i32, src: f64x8, mask: u8, sae: i32) -> f64x8;
44284 #[link_name = "llvm.x86.avx512.mask.rndscale.pd.256"]
44285 unsafefn vrndscalepd256(a: f64x4, imm8: i32, src: f64x4, mask: u8) -> f64x4;
44286 #[link_name = "llvm.x86.avx512.mask.rndscale.pd.128"]
44287 unsafefn vrndscalepd128(a: f64x2, imm8: i32, src: f64x2, mask: u8) -> f64x2;
44288
44289 #[link_name = "llvm.x86.avx512.mask.scalef.ps.512"]
44290 unsafefn vscalefps(a: f32x16, b: f32x16, src: f32x16, mask: u16, rounding: i32) -> f32x16;
44291 #[link_name = "llvm.x86.avx512.mask.scalef.ps.256"]
44292 unsafefn vscalefps256(a: f32x8, b: f32x8, src: f32x8, mask: u8) -> f32x8;
44293 #[link_name = "llvm.x86.avx512.mask.scalef.ps.128"]
44294 unsafefn vscalefps128(a: f32x4, b: f32x4, src: f32x4, mask: u8) -> f32x4;
44295
44296 #[link_name = "llvm.x86.avx512.mask.scalef.pd.512"]
44297 unsafefn vscalefpd(a: f64x8, b: f64x8, src: f64x8, mask: u8, rounding: i32) -> f64x8;
44298 #[link_name = "llvm.x86.avx512.mask.scalef.pd.256"]
44299 unsafefn vscalefpd256(a: f64x4, b: f64x4, src: f64x4, mask: u8) -> f64x4;
44300 #[link_name = "llvm.x86.avx512.mask.scalef.pd.128"]
44301 unsafefn vscalefpd128(a: f64x2, b: f64x2, src: f64x2, mask: u8) -> f64x2;
44302
44303 #[link_name = "llvm.x86.avx512.mask.fixupimm.ps.512"]
44304 unsafefn vfixupimmps(a: f32x16, b: f32x16, c: i32x16, imm8: i32, mask: u16, sae: i32) -> f32x16;
44305 #[link_name = "llvm.x86.avx512.mask.fixupimm.ps.256"]
44306 unsafefn vfixupimmps256(a: f32x8, b: f32x8, c: i32x8, imm8: i32, mask: u8) -> f32x8;
44307 #[link_name = "llvm.x86.avx512.mask.fixupimm.ps.128"]
44308 unsafefn vfixupimmps128(a: f32x4, b: f32x4, c: i32x4, imm8: i32, mask: u8) -> f32x4;
44309
44310 #[link_name = "llvm.x86.avx512.mask.fixupimm.pd.512"]
44311 unsafefn vfixupimmpd(a: f64x8, b: f64x8, c: i64x8, imm8: i32, mask: u8, sae: i32) -> f64x8;
44312 #[link_name = "llvm.x86.avx512.mask.fixupimm.pd.256"]
44313 unsafefn vfixupimmpd256(a: f64x4, b: f64x4, c: i64x4, imm8: i32, mask: u8) -> f64x4;
44314 #[link_name = "llvm.x86.avx512.mask.fixupimm.pd.128"]
44315 unsafefn vfixupimmpd128(a: f64x2, b: f64x2, c: i64x2, imm8: i32, mask: u8) -> f64x2;
44316
44317 #[link_name = "llvm.x86.avx512.maskz.fixupimm.ps.512"]
44318 unsafefn vfixupimmpsz(a: f32x16, b: f32x16, c: i32x16, imm8: i32, mask: u16, sae: i32) -> f32x16;
44319 #[link_name = "llvm.x86.avx512.maskz.fixupimm.ps.256"]
44320 unsafefn vfixupimmpsz256(a: f32x8, b: f32x8, c: i32x8, imm8: i32, mask: u8) -> f32x8;
44321 #[link_name = "llvm.x86.avx512.maskz.fixupimm.ps.128"]
44322 unsafefn vfixupimmpsz128(a: f32x4, b: f32x4, c: i32x4, imm8: i32, mask: u8) -> f32x4;
44323
44324 #[link_name = "llvm.x86.avx512.maskz.fixupimm.pd.512"]
44325 unsafefn vfixupimmpdz(a: f64x8, b: f64x8, c: i64x8, imm8: i32, mask: u8, sae: i32) -> f64x8;
44326 #[link_name = "llvm.x86.avx512.maskz.fixupimm.pd.256"]
44327 unsafefn vfixupimmpdz256(a: f64x4, b: f64x4, c: i64x4, imm8: i32, mask: u8) -> f64x4;
44328 #[link_name = "llvm.x86.avx512.maskz.fixupimm.pd.128"]
44329 unsafefn vfixupimmpdz128(a: f64x2, b: f64x2, c: i64x2, imm8: i32, mask: u8) -> f64x2;
44330
44331 #[link_name = "llvm.x86.avx512.pternlog.d.512"]
44332 unsafefn vpternlogd(a: i32x16, b: i32x16, c: i32x16, imm8: i32) -> i32x16;
44333 #[link_name = "llvm.x86.avx512.pternlog.d.256"]
44334 unsafefn vpternlogd256(a: i32x8, b: i32x8, c: i32x8, imm8: i32) -> i32x8;
44335 #[link_name = "llvm.x86.avx512.pternlog.d.128"]
44336 unsafefn vpternlogd128(a: i32x4, b: i32x4, c: i32x4, imm8: i32) -> i32x4;
44337
44338 #[link_name = "llvm.x86.avx512.pternlog.q.512"]
44339 unsafefn vpternlogq(a: i64x8, b: i64x8, c: i64x8, imm8: i32) -> i64x8;
44340 #[link_name = "llvm.x86.avx512.pternlog.q.256"]
44341 unsafefn vpternlogq256(a: i64x4, b: i64x4, c: i64x4, imm8: i32) -> i64x4;
44342 #[link_name = "llvm.x86.avx512.pternlog.q.128"]
44343 unsafefn vpternlogq128(a: i64x2, b: i64x2, c: i64x2, imm8: i32) -> i64x2;
44344
44345 #[link_name = "llvm.x86.avx512.mask.getmant.ps.512"]
44346 unsafefn vgetmantps(a: f32x16, mantissas: i32, src: f32x16, m: u16, sae: i32) -> f32x16;
44347 #[link_name = "llvm.x86.avx512.mask.getmant.ps.256"]
44348 unsafefn vgetmantps256(a: f32x8, mantissas: i32, src: f32x8, m: u8) -> f32x8;
44349 #[link_name = "llvm.x86.avx512.mask.getmant.ps.128"]
44350 unsafefn vgetmantps128(a: f32x4, mantissas: i32, src: f32x4, m: u8) -> f32x4;
44351
44352 #[link_name = "llvm.x86.avx512.mask.getmant.pd.512"]
44353 unsafefn vgetmantpd(a: f64x8, mantissas: i32, src: f64x8, m: u8, sae: i32) -> f64x8;
44354 #[link_name = "llvm.x86.avx512.mask.getmant.pd.256"]
44355 unsafefn vgetmantpd256(a: f64x4, mantissas: i32, src: f64x4, m: u8) -> f64x4;
44356 #[link_name = "llvm.x86.avx512.mask.getmant.pd.128"]
44357 unsafefn vgetmantpd128(a: f64x2, mantissas: i32, src: f64x2, m: u8) -> f64x2;
44358
44359 #[link_name = "llvm.x86.avx512.rcp14.ps.512"]
44360 unsafefn vrcp14ps(a: f32x16, src: f32x16, m: u16) -> f32x16;
44361 #[link_name = "llvm.x86.avx512.rcp14.ps.256"]
44362 unsafefn vrcp14ps256(a: f32x8, src: f32x8, m: u8) -> f32x8;
44363 #[link_name = "llvm.x86.avx512.rcp14.ps.128"]
44364 unsafefn vrcp14ps128(a: f32x4, src: f32x4, m: u8) -> f32x4;
44365
44366 #[link_name = "llvm.x86.avx512.rcp14.pd.512"]
44367 unsafefn vrcp14pd(a: f64x8, src: f64x8, m: u8) -> f64x8;
44368 #[link_name = "llvm.x86.avx512.rcp14.pd.256"]
44369 unsafefn vrcp14pd256(a: f64x4, src: f64x4, m: u8) -> f64x4;
44370 #[link_name = "llvm.x86.avx512.rcp14.pd.128"]
44371 unsafefn vrcp14pd128(a: f64x2, src: f64x2, m: u8) -> f64x2;
44372
44373 #[link_name = "llvm.x86.avx512.rsqrt14.ps.512"]
44374 unsafefn vrsqrt14ps(a: f32x16, src: f32x16, m: u16) -> f32x16;
44375 #[link_name = "llvm.x86.avx512.rsqrt14.ps.256"]
44376 unsafefn vrsqrt14ps256(a: f32x8, src: f32x8, m: u8) -> f32x8;
44377 #[link_name = "llvm.x86.avx512.rsqrt14.ps.128"]
44378 unsafefn vrsqrt14ps128(a: f32x4, src: f32x4, m: u8) -> f32x4;
44379
44380 #[link_name = "llvm.x86.avx512.rsqrt14.pd.512"]
44381 unsafefn vrsqrt14pd(a: f64x8, src: f64x8, m: u8) -> f64x8;
44382 #[link_name = "llvm.x86.avx512.rsqrt14.pd.256"]
44383 unsafefn vrsqrt14pd256(a: f64x4, src: f64x4, m: u8) -> f64x4;
44384 #[link_name = "llvm.x86.avx512.rsqrt14.pd.128"]
44385 unsafefn vrsqrt14pd128(a: f64x2, src: f64x2, m: u8) -> f64x2;
44386
44387 #[link_name = "llvm.x86.avx512.mask.cvtps2dq.512"]
44388 unsafefn vcvtps2dq(a: f32x16, src: i32x16, mask: u16, rounding: i32) -> i32x16;
44389
44390 #[link_name = "llvm.x86.avx512.mask.cvtps2udq.512"]
44391 unsafefn vcvtps2udq(a: f32x16, src: u32x16, mask: u16, rounding: i32) -> u32x16;
44392 #[link_name = "llvm.x86.avx512.mask.cvtps2udq.256"]
44393 unsafefn vcvtps2udq256(a: f32x8, src: u32x8, mask: u8) -> u32x8;
44394 #[link_name = "llvm.x86.avx512.mask.cvtps2udq.128"]
44395 unsafefn vcvtps2udq128(a: f32x4, src: u32x4, mask: u8) -> u32x4;
44396
44397 #[link_name = "llvm.x86.avx512.mask.cvtps2pd.512"]
44398 unsafefn vcvtps2pd(a: f32x8, src: f64x8, mask: u8, sae: i32) -> f64x8;
44399 #[link_name = "llvm.x86.avx512.mask.cvtpd2ps"]
44400 unsafefn vcvtpd2ps128(a: f64x2, src: f32x4, mask: u8) -> f32x4;
44401 #[link_name = "llvm.x86.avx512.mask.cvtpd2ps.512"]
44402 unsafefn vcvtpd2ps(a: f64x8, src: f32x8, mask: u8, rounding: i32) -> f32x8;
44403
44404 #[link_name = "llvm.x86.avx512.mask.cvtpd2dq.128"]
44405 unsafefn vcvtpd2dq128(a: f64x2, src: i32x4, k: u8) -> i32x4;
44406 #[link_name = "llvm.x86.avx512.mask.cvtpd2dq.512"]
44407 unsafefn vcvtpd2dq(a: f64x8, src: i32x8, mask: u8, rounding: i32) -> i32x8;
44408
44409 #[link_name = "llvm.x86.avx512.mask.cvtpd2udq.512"]
44410 unsafefn vcvtpd2udq(a: f64x8, src: u32x8, mask: u8, rounding: i32) -> u32x8;
44411 #[link_name = "llvm.x86.avx512.mask.cvtpd2udq.256"]
44412 unsafefn vcvtpd2udq256(a: f64x4, src: u32x4, mask: u8) -> u32x4;
44413 #[link_name = "llvm.x86.avx512.mask.cvtpd2udq.128"]
44414 unsafefn vcvtpd2udq128(a: f64x2, src: u32x4, mask: u8) -> u32x4;
44415
44416 #[link_name = "llvm.x86.avx512.sitofp.round.v16f32.v16i32"]
44417 unsafefn vcvtdq2ps(a: i32x16, rounding: i32) -> f32x16;
44418 #[link_name = "llvm.x86.avx512.uitofp.round.v16f32.v16i32"]
44419 unsafefn vcvtudq2ps(a: u32x16, rounding: i32) -> f32x16;
44420
44421 #[link_name = "llvm.x86.avx512.mask.vcvtps2ph.512"]
44422 unsafefn vcvtps2ph(a: f32x16, rounding: i32, src: i16x16, mask: u16) -> i16x16;
44423 #[link_name = "llvm.x86.avx512.mask.vcvtps2ph.256"]
44424 unsafefn vcvtps2ph256(a: f32x8, imm8: i32, src: i16x8, mask: u8) -> i16x8;
44425 #[link_name = "llvm.x86.avx512.mask.vcvtps2ph.128"]
44426 unsafefn vcvtps2ph128(a: f32x4, imm8: i32, src: i16x8, mask: u8) -> i16x8;
44427
44428 #[link_name = "llvm.x86.avx512.mask.vcvtph2ps.512"]
44429 unsafefn vcvtph2ps(a: i16x16, src: f32x16, mask: u16, sae: i32) -> f32x16;
44430
44431 #[link_name = "llvm.x86.avx512.mask.cvttps2dq.512"]
44432 unsafefn vcvttps2dq(a: f32x16, src: i32x16, mask: u16, rounding: i32) -> i32x16;
44433 #[link_name = "llvm.x86.avx512.mask.cvttps2dq.256"]
44434 unsafefn vcvttps2dq256(a: f32x8, src: i32x8, mask: u8) -> i32x8;
44435 #[link_name = "llvm.x86.avx512.mask.cvttps2dq.128"]
44436 unsafefn vcvttps2dq128(a: f32x4, src: i32x4, mask: u8) -> i32x4;
44437
44438 #[link_name = "llvm.x86.avx512.mask.cvttps2udq.512"]
44439 unsafefn vcvttps2udq(a: f32x16, src: u32x16, mask: u16, rounding: i32) -> u32x16;
44440 #[link_name = "llvm.x86.avx512.mask.cvttps2udq.256"]
44441 unsafefn vcvttps2udq256(a: f32x8, src: u32x8, mask: u8) -> u32x8;
44442 #[link_name = "llvm.x86.avx512.mask.cvttps2udq.128"]
44443 unsafefn vcvttps2udq128(a: f32x4, src: u32x4, mask: u8) -> u32x4;
44444
44445 #[link_name = "llvm.x86.avx512.mask.cvttpd2dq.512"]
44446 unsafefn vcvttpd2dq(a: f64x8, src: i32x8, mask: u8, rounding: i32) -> i32x8;
44447 #[link_name = "llvm.x86.avx512.mask.cvttpd2dq.256"]
44448 unsafefn vcvttpd2dq256(a: f64x4, src: i32x4, mask: u8) -> i32x4;
44449 #[link_name = "llvm.x86.avx512.mask.cvttpd2dq.128"]
44450 unsafefn vcvttpd2dq128(a: f64x2, src: i32x4, mask: u8) -> i32x4;
44451
44452 #[link_name = "llvm.x86.avx512.mask.cvttpd2udq.512"]
44453 unsafefn vcvttpd2udq(a: f64x8, src: i32x8, mask: u8, rounding: i32) -> u32x8;
44454 #[link_name = "llvm.x86.avx512.mask.cvttpd2udq.256"]
44455 unsafefn vcvttpd2udq256(a: f64x4, src: i32x4, mask: u8) -> u32x4;
44456 #[link_name = "llvm.x86.avx512.mask.cvttpd2udq.128"]
44457 unsafefn vcvttpd2udq128(a: f64x2, src: i32x4, mask: u8) -> u32x4;
44458
44459 #[link_name = "llvm.x86.avx512.mask.pmov.dw.128"]
44460 unsafefn vpmovdw128(a: i32x4, src: i16x8, mask: u8) -> i16x8;
44461 #[link_name = "llvm.x86.avx512.mask.pmov.db.256"]
44462 unsafefn vpmovdb256(a: i32x8, src: i8x16, mask: u8) -> i8x16;
44463 #[link_name = "llvm.x86.avx512.mask.pmov.db.128"]
44464 unsafefn vpmovdb128(a: i32x4, src: i8x16, mask: u8) -> i8x16;
44465
44466 #[link_name = "llvm.x86.avx512.mask.pmov.qw.256"]
44467 unsafefn vpmovqw256(a: i64x4, src: i16x8, mask: u8) -> i16x8;
44468 #[link_name = "llvm.x86.avx512.mask.pmov.qw.128"]
44469 unsafefn vpmovqw128(a: i64x2, src: i16x8, mask: u8) -> i16x8;
44470 #[link_name = "llvm.x86.avx512.mask.pmov.qb.256"]
44471 unsafefn vpmovqb256(a: i64x4, src: i8x16, mask: u8) -> i8x16;
44472 #[link_name = "llvm.x86.avx512.mask.pmov.qb.128"]
44473 unsafefn vpmovqb128(a: i64x2, src: i8x16, mask: u8) -> i8x16;
44474 #[link_name = "llvm.x86.avx512.mask.pmov.qd.128"]
44475 unsafefn vpmovqd128(a: i64x2, src: i32x4, mask: u8) -> i32x4;
44476
44477 #[link_name = "llvm.x86.avx512.mask.pmov.dw.mem.512"]
44478 unsafefn vpmovdwmem(mem_addr: *mut i8, a: i32x16, mask: u16);
44479 #[link_name = "llvm.x86.avx512.mask.pmov.dw.mem.256"]
44480 unsafefn vpmovdwmem256(mem_addr: *mut i8, a: i32x8, mask: u8);
44481 #[link_name = "llvm.x86.avx512.mask.pmov.dw.mem.128"]
44482 unsafefn vpmovdwmem128(mem_addr: *mut i8, a: i32x4, mask: u8);
44483
44484 #[link_name = "llvm.x86.avx512.mask.pmovs.dw.mem.512"]
44485 unsafefn vpmovsdwmem(mem_addr: *mut i8, a: i32x16, mask: u16);
44486 #[link_name = "llvm.x86.avx512.mask.pmovs.dw.mem.256"]
44487 unsafefn vpmovsdwmem256(mem_addr: *mut i8, a: i32x8, mask: u8);
44488 #[link_name = "llvm.x86.avx512.mask.pmovs.dw.mem.128"]
44489 unsafefn vpmovsdwmem128(mem_addr: *mut i8, a: i32x4, mask: u8);
44490
44491 #[link_name = "llvm.x86.avx512.mask.pmovus.dw.mem.512"]
44492 unsafefn vpmovusdwmem(mem_addr: *mut i8, a: i32x16, mask: u16);
44493 #[link_name = "llvm.x86.avx512.mask.pmovus.dw.mem.256"]
44494 unsafefn vpmovusdwmem256(mem_addr: *mut i8, a: i32x8, mask: u8);
44495 #[link_name = "llvm.x86.avx512.mask.pmovus.dw.mem.128"]
44496 unsafefn vpmovusdwmem128(mem_addr: *mut i8, a: i32x4, mask: u8);
44497
44498 #[link_name = "llvm.x86.avx512.mask.pmov.db.mem.512"]
44499 unsafefn vpmovdbmem(mem_addr: *mut i8, a: i32x16, mask: u16);
44500 #[link_name = "llvm.x86.avx512.mask.pmov.db.mem.256"]
44501 unsafefn vpmovdbmem256(mem_addr: *mut i8, a: i32x8, mask: u8);
44502 #[link_name = "llvm.x86.avx512.mask.pmov.db.mem.128"]
44503 unsafefn vpmovdbmem128(mem_addr: *mut i8, a: i32x4, mask: u8);
44504
44505 #[link_name = "llvm.x86.avx512.mask.pmovs.db.mem.512"]
44506 unsafefn vpmovsdbmem(mem_addr: *mut i8, a: i32x16, mask: u16);
44507 #[link_name = "llvm.x86.avx512.mask.pmovs.db.mem.256"]
44508 unsafefn vpmovsdbmem256(mem_addr: *mut i8, a: i32x8, mask: u8);
44509 #[link_name = "llvm.x86.avx512.mask.pmovs.db.mem.128"]
44510 unsafefn vpmovsdbmem128(mem_addr: *mut i8, a: i32x4, mask: u8);
44511
44512 #[link_name = "llvm.x86.avx512.mask.pmovus.db.mem.512"]
44513 unsafefn vpmovusdbmem(mem_addr: *mut i8, a: i32x16, mask: u16);
44514 #[link_name = "llvm.x86.avx512.mask.pmovus.db.mem.256"]
44515 unsafefn vpmovusdbmem256(mem_addr: *mut i8, a: i32x8, mask: u8);
44516 #[link_name = "llvm.x86.avx512.mask.pmovus.db.mem.128"]
44517 unsafefn vpmovusdbmem128(mem_addr: *mut i8, a: i32x4, mask: u8);
44518
44519 #[link_name = "llvm.x86.avx512.mask.pmov.qw.mem.512"]
44520 unsafefn vpmovqwmem(mem_addr: *mut i8, a: i64x8, mask: u8);
44521 #[link_name = "llvm.x86.avx512.mask.pmov.qw.mem.256"]
44522 unsafefn vpmovqwmem256(mem_addr: *mut i8, a: i64x4, mask: u8);
44523 #[link_name = "llvm.x86.avx512.mask.pmov.qw.mem.128"]
44524 unsafefn vpmovqwmem128(mem_addr: *mut i8, a: i64x2, mask: u8);
44525
44526 #[link_name = "llvm.x86.avx512.mask.pmovs.qw.mem.512"]
44527 unsafefn vpmovsqwmem(mem_addr: *mut i8, a: i64x8, mask: u8);
44528 #[link_name = "llvm.x86.avx512.mask.pmovs.qw.mem.256"]
44529 unsafefn vpmovsqwmem256(mem_addr: *mut i8, a: i64x4, mask: u8);
44530 #[link_name = "llvm.x86.avx512.mask.pmovs.qw.mem.128"]
44531 unsafefn vpmovsqwmem128(mem_addr: *mut i8, a: i64x2, mask: u8);
44532
44533 #[link_name = "llvm.x86.avx512.mask.pmovus.qw.mem.512"]
44534 unsafefn vpmovusqwmem(mem_addr: *mut i8, a: i64x8, mask: u8);
44535 #[link_name = "llvm.x86.avx512.mask.pmovus.qw.mem.256"]
44536 unsafefn vpmovusqwmem256(mem_addr: *mut i8, a: i64x4, mask: u8);
44537 #[link_name = "llvm.x86.avx512.mask.pmovus.qw.mem.128"]
44538 unsafefn vpmovusqwmem128(mem_addr: *mut i8, a: i64x2, mask: u8);
44539
44540 #[link_name = "llvm.x86.avx512.mask.pmov.qb.mem.512"]
44541 unsafefn vpmovqbmem(mem_addr: *mut i8, a: i64x8, mask: u8);
44542 #[link_name = "llvm.x86.avx512.mask.pmov.qb.mem.256"]
44543 unsafefn vpmovqbmem256(mem_addr: *mut i8, a: i64x4, mask: u8);
44544 #[link_name = "llvm.x86.avx512.mask.pmov.qb.mem.128"]
44545 unsafefn vpmovqbmem128(mem_addr: *mut i8, a: i64x2, mask: u8);
44546
44547 #[link_name = "llvm.x86.avx512.mask.pmovs.qb.mem.512"]
44548 unsafefn vpmovsqbmem(mem_addr: *mut i8, a: i64x8, mask: u8);
44549 #[link_name = "llvm.x86.avx512.mask.pmovs.qb.mem.256"]
44550 unsafefn vpmovsqbmem256(mem_addr: *mut i8, a: i64x4, mask: u8);
44551 #[link_name = "llvm.x86.avx512.mask.pmovs.qb.mem.128"]
44552 unsafefn vpmovsqbmem128(mem_addr: *mut i8, a: i64x2, mask: u8);
44553
44554 #[link_name = "llvm.x86.avx512.mask.pmovus.qb.mem.512"]
44555 unsafefn vpmovusqbmem(mem_addr: *mut i8, a: i64x8, mask: u8);
44556 #[link_name = "llvm.x86.avx512.mask.pmovus.qb.mem.256"]
44557 unsafefn vpmovusqbmem256(mem_addr: *mut i8, a: i64x4, mask: u8);
44558 #[link_name = "llvm.x86.avx512.mask.pmovus.qb.mem.128"]
44559 unsafefn vpmovusqbmem128(mem_addr: *mut i8, a: i64x2, mask: u8);
44560
44561 #[link_name = "llvm.x86.avx512.mask.pmov.qd.mem.512"]
44562 unsafefn vpmovqdmem(mem_addr: *mut i8, a: i64x8, mask: u8);
44563 #[link_name = "llvm.x86.avx512.mask.pmov.qd.mem.256"]
44564 unsafefn vpmovqdmem256(mem_addr: *mut i8, a: i64x4, mask: u8);
44565 #[link_name = "llvm.x86.avx512.mask.pmov.qd.mem.128"]
44566 unsafefn vpmovqdmem128(mem_addr: *mut i8, a: i64x2, mask: u8);
44567
44568 #[link_name = "llvm.x86.avx512.mask.pmovs.qd.mem.512"]
44569 unsafefn vpmovsqdmem(mem_addr: *mut i8, a: i64x8, mask: u8);
44570 #[link_name = "llvm.x86.avx512.mask.pmovs.qd.mem.256"]
44571 unsafefn vpmovsqdmem256(mem_addr: *mut i8, a: i64x4, mask: u8);
44572 #[link_name = "llvm.x86.avx512.mask.pmovs.qd.mem.128"]
44573 unsafefn vpmovsqdmem128(mem_addr: *mut i8, a: i64x2, mask: u8);
44574
44575 #[link_name = "llvm.x86.avx512.mask.pmovus.qd.mem.512"]
44576 unsafefn vpmovusqdmem(mem_addr: *mut i8, a: i64x8, mask: u8);
44577 #[link_name = "llvm.x86.avx512.mask.pmovus.qd.mem.256"]
44578 unsafefn vpmovusqdmem256(mem_addr: *mut i8, a: i64x4, mask: u8);
44579 #[link_name = "llvm.x86.avx512.mask.pmovus.qd.mem.128"]
44580 unsafefn vpmovusqdmem128(mem_addr: *mut i8, a: i64x2, mask: u8);
44581
44582 #[link_name = "llvm.x86.avx512.mask.pmov.qb.512"]
44583 unsafefn vpmovqb(a: i64x8, src: i8x16, mask: u8) -> i8x16;
44584
44585 #[link_name = "llvm.x86.avx512.mask.pmovs.dw.512"]
44586 unsafefn vpmovsdw(a: i32x16, src: i16x16, mask: u16) -> i16x16;
44587 #[link_name = "llvm.x86.avx512.mask.pmovs.dw.256"]
44588 unsafefn vpmovsdw256(a: i32x8, src: i16x8, mask: u8) -> i16x8;
44589 #[link_name = "llvm.x86.avx512.mask.pmovs.dw.128"]
44590 unsafefn vpmovsdw128(a: i32x4, src: i16x8, mask: u8) -> i16x8;
44591
44592 #[link_name = "llvm.x86.avx512.mask.pmovs.db.512"]
44593 unsafefn vpmovsdb(a: i32x16, src: i8x16, mask: u16) -> i8x16;
44594 #[link_name = "llvm.x86.avx512.mask.pmovs.db.256"]
44595 unsafefn vpmovsdb256(a: i32x8, src: i8x16, mask: u8) -> i8x16;
44596 #[link_name = "llvm.x86.avx512.mask.pmovs.db.128"]
44597 unsafefn vpmovsdb128(a: i32x4, src: i8x16, mask: u8) -> i8x16;
44598
44599 #[link_name = "llvm.x86.avx512.mask.pmovs.qd.512"]
44600 unsafefn vpmovsqd(a: i64x8, src: i32x8, mask: u8) -> i32x8;
44601 #[link_name = "llvm.x86.avx512.mask.pmovs.qd.256"]
44602 unsafefn vpmovsqd256(a: i64x4, src: i32x4, mask: u8) -> i32x4;
44603 #[link_name = "llvm.x86.avx512.mask.pmovs.qd.128"]
44604 unsafefn vpmovsqd128(a: i64x2, src: i32x4, mask: u8) -> i32x4;
44605
44606 #[link_name = "llvm.x86.avx512.mask.pmovs.qw.512"]
44607 unsafefn vpmovsqw(a: i64x8, src: i16x8, mask: u8) -> i16x8;
44608 #[link_name = "llvm.x86.avx512.mask.pmovs.qw.256"]
44609 unsafefn vpmovsqw256(a: i64x4, src: i16x8, mask: u8) -> i16x8;
44610 #[link_name = "llvm.x86.avx512.mask.pmovs.qw.128"]
44611 unsafefn vpmovsqw128(a: i64x2, src: i16x8, mask: u8) -> i16x8;
44612
44613 #[link_name = "llvm.x86.avx512.mask.pmovs.qb.512"]
44614 unsafefn vpmovsqb(a: i64x8, src: i8x16, mask: u8) -> i8x16;
44615 #[link_name = "llvm.x86.avx512.mask.pmovs.qb.256"]
44616 unsafefn vpmovsqb256(a: i64x4, src: i8x16, mask: u8) -> i8x16;
44617 #[link_name = "llvm.x86.avx512.mask.pmovs.qb.128"]
44618 unsafefn vpmovsqb128(a: i64x2, src: i8x16, mask: u8) -> i8x16;
44619
44620 #[link_name = "llvm.x86.avx512.mask.pmovus.dw.512"]
44621 unsafefn vpmovusdw(a: u32x16, src: u16x16, mask: u16) -> u16x16;
44622 #[link_name = "llvm.x86.avx512.mask.pmovus.dw.256"]
44623 unsafefn vpmovusdw256(a: u32x8, src: u16x8, mask: u8) -> u16x8;
44624 #[link_name = "llvm.x86.avx512.mask.pmovus.dw.128"]
44625 unsafefn vpmovusdw128(a: u32x4, src: u16x8, mask: u8) -> u16x8;
44626
44627 #[link_name = "llvm.x86.avx512.mask.pmovus.db.512"]
44628 unsafefn vpmovusdb(a: u32x16, src: u8x16, mask: u16) -> u8x16;
44629 #[link_name = "llvm.x86.avx512.mask.pmovus.db.256"]
44630 unsafefn vpmovusdb256(a: u32x8, src: u8x16, mask: u8) -> u8x16;
44631 #[link_name = "llvm.x86.avx512.mask.pmovus.db.128"]
44632 unsafefn vpmovusdb128(a: u32x4, src: u8x16, mask: u8) -> u8x16;
44633
44634 #[link_name = "llvm.x86.avx512.mask.pmovus.qd.512"]
44635 unsafefn vpmovusqd(a: u64x8, src: u32x8, mask: u8) -> u32x8;
44636 #[link_name = "llvm.x86.avx512.mask.pmovus.qd.256"]
44637 unsafefn vpmovusqd256(a: u64x4, src: u32x4, mask: u8) -> u32x4;
44638 #[link_name = "llvm.x86.avx512.mask.pmovus.qd.128"]
44639 unsafefn vpmovusqd128(a: u64x2, src: u32x4, mask: u8) -> u32x4;
44640
44641 #[link_name = "llvm.x86.avx512.mask.pmovus.qw.512"]
44642 unsafefn vpmovusqw(a: u64x8, src: u16x8, mask: u8) -> u16x8;
44643 #[link_name = "llvm.x86.avx512.mask.pmovus.qw.256"]
44644 unsafefn vpmovusqw256(a: u64x4, src: u16x8, mask: u8) -> u16x8;
44645 #[link_name = "llvm.x86.avx512.mask.pmovus.qw.128"]
44646 unsafefn vpmovusqw128(a: u64x2, src: u16x8, mask: u8) -> u16x8;
44647
44648 #[link_name = "llvm.x86.avx512.mask.pmovus.qb.512"]
44649 unsafefn vpmovusqb(a: u64x8, src: u8x16, mask: u8) -> u8x16;
44650 #[link_name = "llvm.x86.avx512.mask.pmovus.qb.256"]
44651 unsafefn vpmovusqb256(a: u64x4, src: u8x16, mask: u8) -> u8x16;
44652 #[link_name = "llvm.x86.avx512.mask.pmovus.qb.128"]
44653 unsafefn vpmovusqb128(a: u64x2, src: u8x16, mask: u8) -> u8x16;
44654
44655 #[link_name = "llvm.x86.avx512.gather.dpd.512"]
44656 unsafefn vgatherdpd(src: f64x8, slice: *const i8, offsets: i32x8, mask: i8, scale: i32) -> f64x8;
44657 #[link_name = "llvm.x86.avx512.gather.dps.512"]
44658 unsafefn vgatherdps(src: f32x16, slice: *const i8, offsets: i32x16, mask: i16, scale: i32) -> f32x16;
44659 #[link_name = "llvm.x86.avx512.gather.qpd.512"]
44660 unsafefn vgatherqpd(src: f64x8, slice: *const i8, offsets: i64x8, mask: i8, scale: i32) -> f64x8;
44661 #[link_name = "llvm.x86.avx512.gather.qps.512"]
44662 unsafefn vgatherqps(src: f32x8, slice: *const i8, offsets: i64x8, mask: i8, scale: i32) -> f32x8;
44663 #[link_name = "llvm.x86.avx512.gather.dpq.512"]
44664 unsafefn vpgatherdq(src: i64x8, slice: *const i8, offsets: i32x8, mask: i8, scale: i32) -> i64x8;
44665 #[link_name = "llvm.x86.avx512.gather.dpi.512"]
44666 unsafefn vpgatherdd(src: i32x16, slice: *const i8, offsets: i32x16, mask: i16, scale: i32) -> i32x16;
44667 #[link_name = "llvm.x86.avx512.gather.qpq.512"]
44668 unsafefn vpgatherqq(src: i64x8, slice: *const i8, offsets: i64x8, mask: i8, scale: i32) -> i64x8;
44669 #[link_name = "llvm.x86.avx512.gather.qpi.512"]
44670 unsafefn vpgatherqd(src: i32x8, slice: *const i8, offsets: i64x8, mask: i8, scale: i32) -> i32x8;
44671
44672 #[link_name = "llvm.x86.avx512.scatter.dpd.512"]
44673 unsafefn vscatterdpd(slice: *mut i8, mask: i8, offsets: i32x8, src: f64x8, scale: i32);
44674 #[link_name = "llvm.x86.avx512.scatter.dps.512"]
44675 unsafefn vscatterdps(slice: *mut i8, mask: i16, offsets: i32x16, src: f32x16, scale: i32);
44676 #[link_name = "llvm.x86.avx512.scatter.qpd.512"]
44677 unsafefn vscatterqpd(slice: *mut i8, mask: i8, offsets: i64x8, src: f64x8, scale: i32);
44678 #[link_name = "llvm.x86.avx512.scatter.qps.512"]
44679 unsafefn vscatterqps(slice: *mut i8, mask: i8, offsets: i64x8, src: f32x8, scale: i32);
44680 #[link_name = "llvm.x86.avx512.scatter.dpq.512"]
44681 unsafefn vpscatterdq(slice: *mut i8, mask: i8, offsets: i32x8, src: i64x8, scale: i32);
44682
44683 #[link_name = "llvm.x86.avx512.scatter.dpi.512"]
44684 unsafefn vpscatterdd(slice: *mut i8, mask: i16, offsets: i32x16, src: i32x16, scale: i32);
44685 #[link_name = "llvm.x86.avx512.scatter.qpq.512"]
44686 unsafefn vpscatterqq(slice: *mut i8, mask: i8, offsets: i64x8, src: i64x8, scale: i32);
44687 #[link_name = "llvm.x86.avx512.scatter.qpi.512"]
44688 unsafefn vpscatterqd(slice: *mut i8, mask: i8, offsets: i64x8, src: i32x8, scale: i32);
44689
44690 #[link_name = "llvm.x86.avx512.scattersiv4.si"]
44691 unsafefn vpscatterdd_128(slice: *mut i8, k: u8, offsets: i32x4, src: i32x4, scale: i32);
44692 #[link_name = "llvm.x86.avx512.scattersiv2.di"]
44693 unsafefn vpscatterdq_128(slice: *mut i8, k: u8, offsets: i32x4, src: i64x2, scale: i32);
44694 #[link_name = "llvm.x86.avx512.scattersiv2.df"]
44695 unsafefn vscatterdpd_128(slice: *mut i8, k: u8, offsets: i32x4, src: f64x2, scale: i32);
44696 #[link_name = "llvm.x86.avx512.scattersiv4.sf"]
44697 unsafefn vscatterdps_128(slice: *mut i8, k: u8, offsets: i32x4, src: f32x4, scale: i32);
44698 #[link_name = "llvm.x86.avx512.scatterdiv4.si"]
44699 unsafefn vpscatterqd_128(slice: *mut i8, k: u8, offsets: i64x2, src: i32x4, scale: i32);
44700 #[link_name = "llvm.x86.avx512.scatterdiv2.di"]
44701 unsafefn vpscatterqq_128(slice: *mut i8, k: u8, offsets: i64x2, src: i64x2, scale: i32);
44702 #[link_name = "llvm.x86.avx512.scatterdiv2.df"]
44703 unsafefn vscatterqpd_128(slice: *mut i8, k: u8, offsets: i64x2, src: f64x2, scale: i32);
44704 #[link_name = "llvm.x86.avx512.scatterdiv4.sf"]
44705 unsafefn vscatterqps_128(slice: *mut i8, k: u8, offsets: i64x2, src: f32x4, scale: i32);
44706
44707 #[link_name = "llvm.x86.avx512.scattersiv8.si"]
44708 unsafefn vpscatterdd_256(slice: *mut i8, k: u8, offsets: i32x8, src: i32x8, scale: i32);
44709 #[link_name = "llvm.x86.avx512.scattersiv4.di"]
44710 unsafefn vpscatterdq_256(slice: *mut i8, k: u8, offsets: i32x4, src: i64x4, scale: i32);
44711 #[link_name = "llvm.x86.avx512.scattersiv4.df"]
44712 unsafefn vscatterdpd_256(slice: *mut i8, k: u8, offsets: i32x4, src: f64x4, scale: i32);
44713 #[link_name = "llvm.x86.avx512.scattersiv8.sf"]
44714 unsafefn vscatterdps_256(slice: *mut i8, k: u8, offsets: i32x8, src: f32x8, scale: i32);
44715 #[link_name = "llvm.x86.avx512.scatterdiv8.si"]
44716 unsafefn vpscatterqd_256(slice: *mut i8, k: u8, offsets: i64x4, src: i32x4, scale: i32);
44717 #[link_name = "llvm.x86.avx512.scatterdiv4.di"]
44718 unsafefn vpscatterqq_256(slice: *mut i8, k: u8, offsets: i64x4, src: i64x4, scale: i32);
44719 #[link_name = "llvm.x86.avx512.scatterdiv4.df"]
44720 unsafefn vscatterqpd_256(slice: *mut i8, k: u8, offsets: i64x4, src: f64x4, scale: i32);
44721 #[link_name = "llvm.x86.avx512.scatterdiv8.sf"]
44722 unsafefn vscatterqps_256(slice: *mut i8, k: u8, offsets: i64x4, src: f32x4, scale: i32);
44723
44724 #[link_name = "llvm.x86.avx512.gather3siv4.si"]
44725 unsafefn vpgatherdd_128(src: i32x4, slice: *const i8, offsets: i32x4, k: u8, scale: i32) -> i32x4;
44726 #[link_name = "llvm.x86.avx512.gather3siv2.di"]
44727 unsafefn vpgatherdq_128(src: i64x2, slice: *const i8, offsets: i32x4, k: u8, scale: i32) -> i64x2;
44728 #[link_name = "llvm.x86.avx512.gather3siv2.df"]
44729 unsafefn vgatherdpd_128(src: f64x2, slice: *const i8, offsets: i32x4, k: u8, scale: i32) -> f64x2;
44730 #[link_name = "llvm.x86.avx512.gather3siv4.sf"]
44731 unsafefn vgatherdps_128(src: f32x4, slice: *const u8, offsets: i32x4, k: u8, scale: i32) -> f32x4;
44732 #[link_name = "llvm.x86.avx512.gather3div4.si"]
44733 unsafefn vpgatherqd_128(src: i32x4, slice: *const u8, offsets: i64x2, k: u8, scale: i32) -> i32x4;
44734 #[link_name = "llvm.x86.avx512.gather3div2.di"]
44735 unsafefn vpgatherqq_128(src: i64x2, slice: *const i8, offsets: i64x2, k: u8, scale: i32) -> i64x2;
44736 #[link_name = "llvm.x86.avx512.gather3div2.df"]
44737 unsafefn vgatherqpd_128(src: f64x2, slice: *const i8, offsets: i64x2, k: u8, scale: i32) -> f64x2;
44738 #[link_name = "llvm.x86.avx512.gather3div4.sf"]
44739 unsafefn vgatherqps_128(src: f32x4, slice: *const i8, offsets: i64x2, k: u8, scale: i32) -> f32x4;
44740
44741 #[link_name = "llvm.x86.avx512.gather3siv8.si"]
44742 unsafefn vpgatherdd_256(src: i32x8, slice: *const i8, offsets: i32x8, k: u8, scale: i32) -> i32x8;
44743 #[link_name = "llvm.x86.avx512.gather3siv4.di"]
44744 unsafefn vpgatherdq_256(src: i64x4, slice: *const i8, offsets: i32x4, k: u8, scale: i32) -> i64x4;
44745 #[link_name = "llvm.x86.avx512.gather3siv4.df"]
44746 unsafefn vgatherdpd_256(src: f64x4, slice: *const i8, offsets: i32x4, k: u8, scale: i32) -> f64x4;
44747 #[link_name = "llvm.x86.avx512.gather3siv8.sf"]
44748 unsafefn vgatherdps_256(src: f32x8, slice: *const i8, offsets: i32x8, k: u8, scale: i32) -> f32x8;
44749 #[link_name = "llvm.x86.avx512.gather3div8.si"]
44750 unsafefn vpgatherqd_256(src: i32x4, slice: *const i8, offsets: i64x4, k: u8, scale: i32) -> i32x4;
44751 #[link_name = "llvm.x86.avx512.gather3div4.di"]
44752 unsafefn vpgatherqq_256(src: i64x4, slice: *const i8, offsets: i64x4, k: u8, scale: i32) -> i64x4;
44753 #[link_name = "llvm.x86.avx512.gather3div4.df"]
44754 unsafefn vgatherqpd_256(src: f64x4, slice: *const i8, offsets: i64x4, k: u8, scale: i32) -> f64x4;
44755 #[link_name = "llvm.x86.avx512.gather3div8.sf"]
44756 unsafefn vgatherqps_256(src: f32x4, slice: *const i8, offsets: i64x4, k: u8, scale: i32) -> f32x4;
44757
44758 #[link_name = "llvm.x86.avx512.mask.cmp.ss"]
44759 unsafefn vcmpss(a: __m128, b: __m128, op: i32, m: i8, sae: i32) -> i8;
44760 #[link_name = "llvm.x86.avx512.mask.cmp.sd"]
44761 unsafefn vcmpsd(a: __m128d, b: __m128d, op: i32, m: i8, sae: i32) -> i8;
44762
44763 #[link_name = "llvm.x86.avx512.mask.cmp.ps.512"]
44764 unsafefn vcmpps(a: f32x16, b: f32x16, op: i32, m: i16, sae: i32) -> i16;
44765 #[link_name = "llvm.x86.avx512.mask.cmp.ps.256"]
44766 unsafefn vcmpps256(a: f32x8, b: f32x8, op: i32, m: i8) -> i8;
44767 #[link_name = "llvm.x86.avx512.mask.cmp.ps.128"]
44768 unsafefn vcmpps128(a: f32x4, b: f32x4, op: i32, m: i8) -> i8;
44769
44770 #[link_name = "llvm.x86.avx512.mask.cmp.pd.512"]
44771 unsafefn vcmppd(a: f64x8, b: f64x8, op: i32, m: i8, sae: i32) -> i8;
44772 #[link_name = "llvm.x86.avx512.mask.cmp.pd.256"]
44773 unsafefn vcmppd256(a: f64x4, b: f64x4, op: i32, m: i8) -> i8;
44774 #[link_name = "llvm.x86.avx512.mask.cmp.pd.128"]
44775 unsafefn vcmppd128(a: f64x2, b: f64x2, op: i32, m: i8) -> i8;
44776
44777 #[link_name = "llvm.x86.avx512.psll.d.512"]
44778 unsafefn vpslld(a: i32x16, count: i32x4) -> i32x16;
44779 #[link_name = "llvm.x86.avx512.psrl.d.512"]
44780 unsafefn vpsrld(a: i32x16, count: i32x4) -> i32x16;
44781 #[link_name = "llvm.x86.avx512.psll.q.512"]
44782 unsafefn vpsllq(a: i64x8, count: i64x2) -> i64x8;
44783 #[link_name = "llvm.x86.avx512.psrl.q.512"]
44784 unsafefn vpsrlq(a: i64x8, count: i64x2) -> i64x8;
44785
44786 #[link_name = "llvm.x86.avx512.psra.d.512"]
44787 unsafefn vpsrad(a: i32x16, count: i32x4) -> i32x16;
44788
44789 #[link_name = "llvm.x86.avx512.psra.q.512"]
44790 unsafefn vpsraq(a: i64x8, count: i64x2) -> i64x8;
44791 #[link_name = "llvm.x86.avx512.psra.q.256"]
44792 unsafefn vpsraq256(a: i64x4, count: i64x2) -> i64x4;
44793 #[link_name = "llvm.x86.avx512.psra.q.128"]
44794 unsafefn vpsraq128(a: i64x2, count: i64x2) -> i64x2;
44795
44796 #[link_name = "llvm.x86.avx512.vpermilvar.ps.512"]
44797 unsafefn vpermilps(a: f32x16, b: i32x16) -> f32x16;
44798 #[link_name = "llvm.x86.avx512.vpermilvar.pd.512"]
44799 unsafefn vpermilpd(a: f64x8, b: i64x8) -> f64x8;
44800
44801 #[link_name = "llvm.x86.avx512.permvar.si.512"]
44802 unsafefn vpermd(a: i32x16, idx: i32x16) -> i32x16;
44803
44804 #[link_name = "llvm.x86.avx512.permvar.di.512"]
44805 unsafefn vpermq(a: i64x8, idx: i64x8) -> i64x8;
44806 #[link_name = "llvm.x86.avx512.permvar.di.256"]
44807 unsafefn vpermq256(a: i64x4, idx: i64x4) -> i64x4;
44808
44809 #[link_name = "llvm.x86.avx512.permvar.sf.512"]
44810 unsafefn vpermps(a: f32x16, idx: i32x16) -> f32x16;
44811
44812 #[link_name = "llvm.x86.avx512.permvar.df.512"]
44813 unsafefn vpermpd(a: f64x8, idx: i64x8) -> f64x8;
44814 #[link_name = "llvm.x86.avx512.permvar.df.256"]
44815 unsafefn vpermpd256(a: f64x4, idx: i64x4) -> f64x4;
44816
44817 #[link_name = "llvm.x86.avx512.vpermi2var.d.512"]
44818 unsafefn vpermi2d(a: i32x16, idx: i32x16, b: i32x16) -> i32x16;
44819 #[link_name = "llvm.x86.avx512.vpermi2var.d.256"]
44820 unsafefn vpermi2d256(a: i32x8, idx: i32x8, b: i32x8) -> i32x8;
44821 #[link_name = "llvm.x86.avx512.vpermi2var.d.128"]
44822 unsafefn vpermi2d128(a: i32x4, idx: i32x4, b: i32x4) -> i32x4;
44823
44824 #[link_name = "llvm.x86.avx512.vpermi2var.q.512"]
44825 unsafefn vpermi2q(a: i64x8, idx: i64x8, b: i64x8) -> i64x8;
44826 #[link_name = "llvm.x86.avx512.vpermi2var.q.256"]
44827 unsafefn vpermi2q256(a: i64x4, idx: i64x4, b: i64x4) -> i64x4;
44828 #[link_name = "llvm.x86.avx512.vpermi2var.q.128"]
44829 unsafefn vpermi2q128(a: i64x2, idx: i64x2, b: i64x2) -> i64x2;
44830
44831 #[link_name = "llvm.x86.avx512.vpermi2var.ps.512"]
44832 unsafefn vpermi2ps(a: f32x16, idx: i32x16, b: f32x16) -> f32x16;
44833 #[link_name = "llvm.x86.avx512.vpermi2var.ps.256"]
44834 unsafefn vpermi2ps256(a: f32x8, idx: i32x8, b: f32x8) -> f32x8;
44835 #[link_name = "llvm.x86.avx512.vpermi2var.ps.128"]
44836 unsafefn vpermi2ps128(a: f32x4, idx: i32x4, b: f32x4) -> f32x4;
44837
44838 #[link_name = "llvm.x86.avx512.vpermi2var.pd.512"]
44839 unsafefn vpermi2pd(a: f64x8, idx: i64x8, b: f64x8) -> f64x8;
44840 #[link_name = "llvm.x86.avx512.vpermi2var.pd.256"]
44841 unsafefn vpermi2pd256(a: f64x4, idx: i64x4, b: f64x4) -> f64x4;
44842 #[link_name = "llvm.x86.avx512.vpermi2var.pd.128"]
44843 unsafefn vpermi2pd128(a: f64x2, idx: i64x2, b: f64x2) -> f64x2;
44844
44845 #[link_name = "llvm.x86.avx512.mask.compress.d.512"]
44846 unsafefn vpcompressd(a: i32x16, src: i32x16, mask: u16) -> i32x16;
44847 #[link_name = "llvm.x86.avx512.mask.compress.d.256"]
44848 unsafefn vpcompressd256(a: i32x8, src: i32x8, mask: u8) -> i32x8;
44849 #[link_name = "llvm.x86.avx512.mask.compress.d.128"]
44850 unsafefn vpcompressd128(a: i32x4, src: i32x4, mask: u8) -> i32x4;
44851
44852 #[link_name = "llvm.x86.avx512.mask.compress.q.512"]
44853 unsafefn vpcompressq(a: i64x8, src: i64x8, mask: u8) -> i64x8;
44854 #[link_name = "llvm.x86.avx512.mask.compress.q.256"]
44855 unsafefn vpcompressq256(a: i64x4, src: i64x4, mask: u8) -> i64x4;
44856 #[link_name = "llvm.x86.avx512.mask.compress.q.128"]
44857 unsafefn vpcompressq128(a: i64x2, src: i64x2, mask: u8) -> i64x2;
44858
44859 #[link_name = "llvm.x86.avx512.mask.compress.ps.512"]
44860 unsafefn vcompressps(a: f32x16, src: f32x16, mask: u16) -> f32x16;
44861 #[link_name = "llvm.x86.avx512.mask.compress.ps.256"]
44862 unsafefn vcompressps256(a: f32x8, src: f32x8, mask: u8) -> f32x8;
44863 #[link_name = "llvm.x86.avx512.mask.compress.ps.128"]
44864 unsafefn vcompressps128(a: f32x4, src: f32x4, mask: u8) -> f32x4;
44865
44866 #[link_name = "llvm.x86.avx512.mask.compress.pd.512"]
44867 unsafefn vcompresspd(a: f64x8, src: f64x8, mask: u8) -> f64x8;
44868 #[link_name = "llvm.x86.avx512.mask.compress.pd.256"]
44869 unsafefn vcompresspd256(a: f64x4, src: f64x4, mask: u8) -> f64x4;
44870 #[link_name = "llvm.x86.avx512.mask.compress.pd.128"]
44871 unsafefn vcompresspd128(a: f64x2, src: f64x2, mask: u8) -> f64x2;
44872
44873 #[link_name = "llvm.x86.avx512.mask.compress.store.d.512"]
44874 unsafefn vcompressstored(mem: *mut i8, data: i32x16, mask: u16);
44875 #[link_name = "llvm.x86.avx512.mask.compress.store.d.256"]
44876 unsafefn vcompressstored256(mem: *mut i8, data: i32x8, mask: u8);
44877 #[link_name = "llvm.x86.avx512.mask.compress.store.d.128"]
44878 unsafefn vcompressstored128(mem: *mut i8, data: i32x4, mask: u8);
44879
44880 #[link_name = "llvm.x86.avx512.mask.compress.store.q.512"]
44881 unsafefn vcompressstoreq(mem: *mut i8, data: i64x8, mask: u8);
44882 #[link_name = "llvm.x86.avx512.mask.compress.store.q.256"]
44883 unsafefn vcompressstoreq256(mem: *mut i8, data: i64x4, mask: u8);
44884 #[link_name = "llvm.x86.avx512.mask.compress.store.q.128"]
44885 unsafefn vcompressstoreq128(mem: *mut i8, data: i64x2, mask: u8);
44886
44887 #[link_name = "llvm.x86.avx512.mask.compress.store.ps.512"]
44888 unsafefn vcompressstoreps(mem: *mut i8, data: f32x16, mask: u16);
44889 #[link_name = "llvm.x86.avx512.mask.compress.store.ps.256"]
44890 unsafefn vcompressstoreps256(mem: *mut i8, data: f32x8, mask: u8);
44891 #[link_name = "llvm.x86.avx512.mask.compress.store.ps.128"]
44892 unsafefn vcompressstoreps128(mem: *mut i8, data: f32x4, mask: u8);
44893
44894 #[link_name = "llvm.x86.avx512.mask.compress.store.pd.512"]
44895 unsafefn vcompressstorepd(mem: *mut i8, data: f64x8, mask: u8);
44896 #[link_name = "llvm.x86.avx512.mask.compress.store.pd.256"]
44897 unsafefn vcompressstorepd256(mem: *mut i8, data: f64x4, mask: u8);
44898 #[link_name = "llvm.x86.avx512.mask.compress.store.pd.128"]
44899 unsafefn vcompressstorepd128(mem: *mut i8, data: f64x2, mask: u8);
44900
44901 #[link_name = "llvm.x86.avx512.mask.expand.d.512"]
44902 unsafefn vpexpandd(a: i32x16, src: i32x16, mask: u16) -> i32x16;
44903 #[link_name = "llvm.x86.avx512.mask.expand.d.256"]
44904 unsafefn vpexpandd256(a: i32x8, src: i32x8, mask: u8) -> i32x8;
44905 #[link_name = "llvm.x86.avx512.mask.expand.d.128"]
44906 unsafefn vpexpandd128(a: i32x4, src: i32x4, mask: u8) -> i32x4;
44907
44908 #[link_name = "llvm.x86.avx512.mask.expand.q.512"]
44909 unsafefn vpexpandq(a: i64x8, src: i64x8, mask: u8) -> i64x8;
44910 #[link_name = "llvm.x86.avx512.mask.expand.q.256"]
44911 unsafefn vpexpandq256(a: i64x4, src: i64x4, mask: u8) -> i64x4;
44912 #[link_name = "llvm.x86.avx512.mask.expand.q.128"]
44913 unsafefn vpexpandq128(a: i64x2, src: i64x2, mask: u8) -> i64x2;
44914
44915 #[link_name = "llvm.x86.avx512.mask.expand.ps.512"]
44916 unsafefn vexpandps(a: f32x16, src: f32x16, mask: u16) -> f32x16;
44917 #[link_name = "llvm.x86.avx512.mask.expand.ps.256"]
44918 unsafefn vexpandps256(a: f32x8, src: f32x8, mask: u8) -> f32x8;
44919 #[link_name = "llvm.x86.avx512.mask.expand.ps.128"]
44920 unsafefn vexpandps128(a: f32x4, src: f32x4, mask: u8) -> f32x4;
44921
44922 #[link_name = "llvm.x86.avx512.mask.expand.pd.512"]
44923 unsafefn vexpandpd(a: f64x8, src: f64x8, mask: u8) -> f64x8;
44924 #[link_name = "llvm.x86.avx512.mask.expand.pd.256"]
44925 unsafefn vexpandpd256(a: f64x4, src: f64x4, mask: u8) -> f64x4;
44926 #[link_name = "llvm.x86.avx512.mask.expand.pd.128"]
44927 unsafefn vexpandpd128(a: f64x2, src: f64x2, mask: u8) -> f64x2;
44928
44929 #[link_name = "llvm.x86.avx512.mask.add.ss.round"]
44930 unsafefn vaddss(a: f32x4, b: f32x4, src: f32x4, mask: u8, rounding: i32) -> f32x4;
44931 #[link_name = "llvm.x86.avx512.mask.add.sd.round"]
44932 unsafefn vaddsd(a: f64x2, b: f64x2, src: f64x2, mask: u8, rounding: i32) -> f64x2;
44933 #[link_name = "llvm.x86.avx512.mask.sub.ss.round"]
44934 unsafefn vsubss(a: f32x4, b: f32x4, src: f32x4, mask: u8, rounding: i32) -> f32x4;
44935 #[link_name = "llvm.x86.avx512.mask.sub.sd.round"]
44936 unsafefn vsubsd(a: f64x2, b: f64x2, src: f64x2, mask: u8, rounding: i32) -> f64x2;
44937 #[link_name = "llvm.x86.avx512.mask.mul.ss.round"]
44938 unsafefn vmulss(a: f32x4, b: f32x4, src: f32x4, mask: u8, rounding: i32) -> f32x4;
44939 #[link_name = "llvm.x86.avx512.mask.mul.sd.round"]
44940 unsafefn vmulsd(a: f64x2, b: f64x2, src: f64x2, mask: u8, rounding: i32) -> f64x2;
44941 #[link_name = "llvm.x86.avx512.mask.div.ss.round"]
44942 unsafefn vdivss(a: f32x4, b: f32x4, src: f32x4, mask: u8, rounding: i32) -> f32x4;
44943 #[link_name = "llvm.x86.avx512.mask.div.sd.round"]
44944 unsafefn vdivsd(a: f64x2, b: f64x2, src: f64x2, mask: u8, rounding: i32) -> f64x2;
44945 #[link_name = "llvm.x86.avx512.mask.max.ss.round"]
44946 unsafefn vmaxss(a: f32x4, b: f32x4, src: f32x4, mask: u8, sae: i32) -> f32x4;
44947 #[link_name = "llvm.x86.avx512.mask.max.sd.round"]
44948 unsafefn vmaxsd(a: f64x2, b: f64x2, src: f64x2, mask: u8, sae: i32) -> f64x2;
44949 #[link_name = "llvm.x86.avx512.mask.min.ss.round"]
44950 unsafefn vminss(a: f32x4, b: f32x4, src: f32x4, mask: u8, sae: i32) -> f32x4;
44951 #[link_name = "llvm.x86.avx512.mask.min.sd.round"]
44952 unsafefn vminsd(a: f64x2, b: f64x2, src: f64x2, mask: u8, sae: i32) -> f64x2;
44953 #[link_name = "llvm.x86.avx512.mask.sqrt.ss"]
44954 unsafefn vsqrtss(a: __m128, b: __m128, src: __m128, mask: u8, rounding: i32) -> __m128;
44955 #[link_name = "llvm.x86.avx512.mask.sqrt.sd"]
44956 unsafefn vsqrtsd(a: __m128d, b: __m128d, src: __m128d, mask: u8, rounding: i32) -> __m128d;
44957 #[link_name = "llvm.x86.avx512.mask.getexp.ss"]
44958 unsafefn vgetexpss(a: f32x4, b: f32x4, src: f32x4, mask: u8, sae: i32) -> f32x4;
44959 #[link_name = "llvm.x86.avx512.mask.getexp.sd"]
44960 unsafefn vgetexpsd(a: f64x2, b: f64x2, src: f64x2, mask: u8, sae: i32) -> f64x2;
44961 #[link_name = "llvm.x86.avx512.mask.getmant.ss"]
44962 unsafefn vgetmantss(a: f32x4, b: f32x4, mantissas: i32, src: f32x4, m: u8, sae: i32) -> f32x4;
44963 #[link_name = "llvm.x86.avx512.mask.getmant.sd"]
44964 unsafefn vgetmantsd(a: f64x2, b: f64x2, mantissas: i32, src: f64x2, m: u8, sae: i32) -> f64x2;
44965
44966 #[link_name = "llvm.x86.avx512.rsqrt14.ss"]
44967 unsafefn vrsqrt14ss(a: f32x4, b: f32x4, src: f32x4, mask: u8) -> f32x4;
44968 #[link_name = "llvm.x86.avx512.rsqrt14.sd"]
44969 unsafefn vrsqrt14sd(a: f64x2, b: f64x2, src: f64x2, mask: u8) -> f64x2;
44970 #[link_name = "llvm.x86.avx512.rcp14.ss"]
44971 unsafefn vrcp14ss(a: f32x4, b: f32x4, src: f32x4, mask: u8) -> f32x4;
44972 #[link_name = "llvm.x86.avx512.rcp14.sd"]
44973 unsafefn vrcp14sd(a: f64x2, b: f64x2, src: f64x2, mask: u8) -> f64x2;
44974
44975 #[link_name = "llvm.x86.avx512.mask.rndscale.ss"]
44976 unsafefn vrndscaless(a: f32x4, b: f32x4, src: f32x4, mask: u8, imm8: i32, sae: i32) -> f32x4;
44977 #[link_name = "llvm.x86.avx512.mask.rndscale.sd"]
44978 unsafefn vrndscalesd(a: f64x2, b: f64x2, src: f64x2, mask: u8, imm8: i32, sae: i32) -> f64x2;
44979 #[link_name = "llvm.x86.avx512.mask.scalef.ss"]
44980 unsafefn vscalefss(a: f32x4, b: f32x4, src: f32x4, mask: u8, rounding: i32) -> f32x4;
44981 #[link_name = "llvm.x86.avx512.mask.scalef.sd"]
44982 unsafefn vscalefsd(a: f64x2, b: f64x2, src: f64x2, mask: u8, rounding: i32) -> f64x2;
44983
44984 #[link_name = "llvm.x86.avx512.vfmadd.f32"]
44985 unsafefn vfmaddssround(a: f32, b: f32, c: f32, rounding: i32) -> f32;
44986 #[link_name = "llvm.x86.avx512.vfmadd.f64"]
44987 unsafefn vfmaddsdround(a: f64, b: f64, c: f64, rounding: i32) -> f64;
44988
44989 #[link_name = "llvm.x86.avx512.mask.fixupimm.ss"]
44990 unsafefn vfixupimmss(a: f32x4, b: f32x4, c: i32x4, imm8: i32, mask: u8, sae: i32) -> f32x4;
44991 #[link_name = "llvm.x86.avx512.mask.fixupimm.sd"]
44992 unsafefn vfixupimmsd(a: f64x2, b: f64x2, c: i64x2, imm8: i32, mask: u8, sae: i32) -> f64x2;
44993 #[link_name = "llvm.x86.avx512.maskz.fixupimm.ss"]
44994 unsafefn vfixupimmssz(a: f32x4, b: f32x4, c: i32x4, imm8: i32, mask: u8, sae: i32) -> f32x4;
44995 #[link_name = "llvm.x86.avx512.maskz.fixupimm.sd"]
44996 unsafefn vfixupimmsdz(a: f64x2, b: f64x2, c: i64x2, imm8: i32, mask: u8, sae: i32) -> f64x2;
44997
44998 #[link_name = "llvm.x86.avx512.mask.cvtss2sd.round"]
44999 unsafefn vcvtss2sd(a: f64x2, b: f32x4, src: f64x2, mask: u8, sae: i32) -> f64x2;
45000 #[link_name = "llvm.x86.avx512.mask.cvtsd2ss.round"]
45001 unsafefn vcvtsd2ss(a: f32x4, b: f64x2, src: f32x4, mask: u8, rounding: i32) -> f32x4;
45002
45003 #[link_name = "llvm.x86.avx512.vcvtss2si32"]
45004 unsafefn vcvtss2si(a: f32x4, rounding: i32) -> i32;
45005 #[link_name = "llvm.x86.avx512.vcvtss2usi32"]
45006 unsafefn vcvtss2usi(a: f32x4, rounding: i32) -> u32;
45007
45008 #[link_name = "llvm.x86.avx512.vcvtsd2si32"]
45009 unsafefn vcvtsd2si(a: f64x2, rounding: i32) -> i32;
45010 #[link_name = "llvm.x86.avx512.vcvtsd2usi32"]
45011 unsafefn vcvtsd2usi(a: f64x2, rounding: i32) -> u32;
45012
45013 #[link_name = "llvm.x86.avx512.cvtsi2ss32"]
45014 unsafefn vcvtsi2ss(a: f32x4, b: i32, rounding: i32) -> f32x4;
45015
45016 #[link_name = "llvm.x86.avx512.cvtusi2ss"]
45017 unsafefn vcvtusi2ss(a: f32x4, b: u32, rounding: i32) -> f32x4;
45018
45019 #[link_name = "llvm.x86.avx512.cvttss2si"]
45020 unsafefn vcvttss2si(a: f32x4, rounding: i32) -> i32;
45021 #[link_name = "llvm.x86.avx512.cvttss2usi"]
45022 unsafefn vcvttss2usi(a: f32x4, rounding: i32) -> u32;
45023
45024 #[link_name = "llvm.x86.avx512.cvttsd2si"]
45025 unsafefn vcvttsd2si(a: f64x2, rounding: i32) -> i32;
45026 #[link_name = "llvm.x86.avx512.cvttsd2usi"]
45027 unsafefn vcvttsd2usi(a: f64x2, rounding: i32) -> u32;
45028
45029 #[link_name = "llvm.x86.avx512.vcomi.ss"]
45030 unsafefn vcomiss(a: f32x4, b: f32x4, imm8: i32, sae: i32) -> i32;
45031 #[link_name = "llvm.x86.avx512.vcomi.sd"]
45032 unsafefn vcomisd(a: f64x2, b: f64x2, imm8: i32, sae: i32) -> i32;
45033
45034 #[link_name = "llvm.x86.avx512.mask.expand.load.d.128"]
45035 unsafefn expandloadd_128(mem_addr: *const i32, a: i32x4, mask: u8) -> i32x4;
45036 #[link_name = "llvm.x86.avx512.mask.expand.load.q.128"]
45037 unsafefn expandloadq_128(mem_addr: *const i64, a: i64x2, mask: u8) -> i64x2;
45038 #[link_name = "llvm.x86.avx512.mask.expand.load.ps.128"]
45039 unsafefn expandloadps_128(mem_addr: *const f32, a: f32x4, mask: u8) -> f32x4;
45040 #[link_name = "llvm.x86.avx512.mask.expand.load.pd.128"]
45041 unsafefn expandloadpd_128(mem_addr: *const f64, a: f64x2, mask: u8) -> f64x2;
45042 #[link_name = "llvm.x86.avx512.mask.expand.load.d.256"]
45043 unsafefn expandloadd_256(mem_addr: *const i32, a: i32x8, mask: u8) -> i32x8;
45044 #[link_name = "llvm.x86.avx512.mask.expand.load.q.256"]
45045 unsafefn expandloadq_256(mem_addr: *const i64, a: i64x4, mask: u8) -> i64x4;
45046 #[link_name = "llvm.x86.avx512.mask.expand.load.ps.256"]
45047 unsafefn expandloadps_256(mem_addr: *const f32, a: f32x8, mask: u8) -> f32x8;
45048 #[link_name = "llvm.x86.avx512.mask.expand.load.pd.256"]
45049 unsafefn expandloadpd_256(mem_addr: *const f64, a: f64x4, mask: u8) -> f64x4;
45050 #[link_name = "llvm.x86.avx512.mask.expand.load.d.512"]
45051 unsafefn expandloadd_512(mem_addr: *const i32, a: i32x16, mask: u16) -> i32x16;
45052 #[link_name = "llvm.x86.avx512.mask.expand.load.q.512"]
45053 unsafefn expandloadq_512(mem_addr: *const i64, a: i64x8, mask: u8) -> i64x8;
45054 #[link_name = "llvm.x86.avx512.mask.expand.load.ps.512"]
45055 unsafefn expandloadps_512(mem_addr: *const f32, a: f32x16, mask: u16) -> f32x16;
45056 #[link_name = "llvm.x86.avx512.mask.expand.load.pd.512"]
45057 unsafefn expandloadpd_512(mem_addr: *const f64, a: f64x8, mask: u8) -> f64x8;
45058
45059}
45060
45061#[cfg(test)]
45062mod tests {
45063 use crate::core_arch::assert_eq_const as assert_eq;
45064
45065 use stdarch_test::simd_test;
45066
45067 use crate::core_arch::x86::*;
45068 use crate::hint::black_box;
45069 use crate::mem::{self};
45070
45071 #[simd_test(enable = "avx512f")]
45072 const fn test_mm512_abs_epi32() {
45073 #[rustfmt::skip]
45074 let a = _mm512_setr_epi32(
45075 0, 1, -1, i32::MAX,
45076 i32::MIN, 100, -100, -32,
45077 0, 1, -1, i32::MAX,
45078 i32::MIN, 100, -100, -32,
45079 );
45080 let r = _mm512_abs_epi32(a);
45081 #[rustfmt::skip]
45082 let e = _mm512_setr_epi32(
45083 0, 1, 1, i32::MAX,
45084 i32::MAX.wrapping_add(1), 100, 100, 32,
45085 0, 1, 1, i32::MAX,
45086 i32::MAX.wrapping_add(1), 100, 100, 32,
45087 );
45088 assert_eq_m512i(r, e);
45089 }
45090
45091 #[simd_test(enable = "avx512f")]
45092 const fn test_mm512_mask_abs_epi32() {
45093 #[rustfmt::skip]
45094 let a = _mm512_setr_epi32(
45095 0, 1, -1, i32::MAX,
45096 i32::MIN, 100, -100, -32,
45097 0, 1, -1, i32::MAX,
45098 i32::MIN, 100, -100, -32,
45099 );
45100 let r = _mm512_mask_abs_epi32(a, 0, a);
45101 assert_eq_m512i(r, a);
45102 let r = _mm512_mask_abs_epi32(a, 0b00000000_11111111, a);
45103 #[rustfmt::skip]
45104 let e = _mm512_setr_epi32(
45105 0, 1, 1, i32::MAX,
45106 i32::MAX.wrapping_add(1), 100, 100, 32,
45107 0, 1, -1, i32::MAX,
45108 i32::MIN, 100, -100, -32,
45109 );
45110 assert_eq_m512i(r, e);
45111 }
45112
45113 #[simd_test(enable = "avx512f")]
45114 const fn test_mm512_maskz_abs_epi32() {
45115 #[rustfmt::skip]
45116 let a = _mm512_setr_epi32(
45117 0, 1, -1, i32::MAX,
45118 i32::MIN, 100, -100, -32,
45119 0, 1, -1, i32::MAX,
45120 i32::MIN, 100, -100, -32,
45121 );
45122 let r = _mm512_maskz_abs_epi32(0, a);
45123 assert_eq_m512i(r, _mm512_setzero_si512());
45124 let r = _mm512_maskz_abs_epi32(0b00000000_11111111, a);
45125 #[rustfmt::skip]
45126 let e = _mm512_setr_epi32(
45127 0, 1, 1, i32::MAX,
45128 i32::MAX.wrapping_add(1), 100, 100, 32,
45129 0, 0, 0, 0,
45130 0, 0, 0, 0,
45131 );
45132 assert_eq_m512i(r, e);
45133 }
45134
45135 #[simd_test(enable = "avx512f,avx512vl")]
45136 const fn test_mm256_mask_abs_epi32() {
45137 #[rustfmt::skip]
45138 let a = _mm256_setr_epi32(
45139 0, 1, -1, i32::MAX,
45140 i32::MIN, 100, -100, -32,
45141 );
45142 let r = _mm256_mask_abs_epi32(a, 0, a);
45143 assert_eq_m256i(r, a);
45144 let r = _mm256_mask_abs_epi32(a, 0b00001111, a);
45145 #[rustfmt::skip]
45146 let e = _mm256_setr_epi32(
45147 0, 1, 1, i32::MAX,
45148 i32::MAX.wrapping_add(1), 100, -100, -32,
45149 );
45150 assert_eq_m256i(r, e);
45151 }
45152
45153 #[simd_test(enable = "avx512f,avx512vl")]
45154 const fn test_mm256_maskz_abs_epi32() {
45155 #[rustfmt::skip]
45156 let a = _mm256_setr_epi32(
45157 0, 1, -1, i32::MAX,
45158 i32::MIN, 100, -100, -32,
45159 );
45160 let r = _mm256_maskz_abs_epi32(0, a);
45161 assert_eq_m256i(r, _mm256_setzero_si256());
45162 let r = _mm256_maskz_abs_epi32(0b00001111, a);
45163 #[rustfmt::skip]
45164 let e = _mm256_setr_epi32(
45165 0, 1, 1, i32::MAX,
45166 0, 0, 0, 0,
45167 );
45168 assert_eq_m256i(r, e);
45169 }
45170
45171 #[simd_test(enable = "avx512f,avx512vl")]
45172 const fn test_mm_mask_abs_epi32() {
45173 let a = _mm_setr_epi32(i32::MIN, 100, -100, -32);
45174 let r = _mm_mask_abs_epi32(a, 0, a);
45175 assert_eq_m128i(r, a);
45176 let r = _mm_mask_abs_epi32(a, 0b00001111, a);
45177 let e = _mm_setr_epi32(i32::MAX.wrapping_add(1), 100, 100, 32);
45178 assert_eq_m128i(r, e);
45179 }
45180
45181 #[simd_test(enable = "avx512f,avx512vl")]
45182 const fn test_mm_maskz_abs_epi32() {
45183 let a = _mm_setr_epi32(i32::MIN, 100, -100, -32);
45184 let r = _mm_maskz_abs_epi32(0, a);
45185 assert_eq_m128i(r, _mm_setzero_si128());
45186 let r = _mm_maskz_abs_epi32(0b00001111, a);
45187 let e = _mm_setr_epi32(i32::MAX.wrapping_add(1), 100, 100, 32);
45188 assert_eq_m128i(r, e);
45189 }
45190
45191 #[simd_test(enable = "avx512f")]
45192 const fn test_mm512_abs_ps() {
45193 #[rustfmt::skip]
45194 let a = _mm512_setr_ps(
45195 0., 1., -1., f32::MAX,
45196 f32::MIN, 100., -100., -32.,
45197 0., 1., -1., f32::MAX,
45198 f32::MIN, 100., -100., -32.,
45199 );
45200 let r = _mm512_abs_ps(a);
45201 #[rustfmt::skip]
45202 let e = _mm512_setr_ps(
45203 0., 1., 1., f32::MAX,
45204 f32::MAX, 100., 100., 32.,
45205 0., 1., 1., f32::MAX,
45206 f32::MAX, 100., 100., 32.,
45207 );
45208 assert_eq_m512(r, e);
45209 }
45210
45211 #[simd_test(enable = "avx512f")]
45212 const fn test_mm512_mask_abs_ps() {
45213 #[rustfmt::skip]
45214 let a = _mm512_setr_ps(
45215 0., 1., -1., f32::MAX,
45216 f32::MIN, 100., -100., -32.,
45217 0., 1., -1., f32::MAX,
45218 f32::MIN, 100., -100., -32.,
45219 );
45220 let r = _mm512_mask_abs_ps(a, 0, a);
45221 assert_eq_m512(r, a);
45222 let r = _mm512_mask_abs_ps(a, 0b00000000_11111111, a);
45223 #[rustfmt::skip]
45224 let e = _mm512_setr_ps(
45225 0., 1., 1., f32::MAX,
45226 f32::MAX, 100., 100., 32.,
45227 0., 1., -1., f32::MAX,
45228 f32::MIN, 100., -100., -32.,
45229 );
45230 assert_eq_m512(r, e);
45231 }
45232
45233 #[simd_test(enable = "avx512f")]
45234 const fn test_mm512_mask_mov_epi32() {
45235 let src = _mm512_set1_epi32(1);
45236 let a = _mm512_set1_epi32(2);
45237 let r = _mm512_mask_mov_epi32(src, 0, a);
45238 assert_eq_m512i(r, src);
45239 let r = _mm512_mask_mov_epi32(src, 0b11111111_11111111, a);
45240 assert_eq_m512i(r, a);
45241 }
45242
45243 #[simd_test(enable = "avx512f")]
45244 const fn test_mm512_maskz_mov_epi32() {
45245 let a = _mm512_set1_epi32(2);
45246 let r = _mm512_maskz_mov_epi32(0, a);
45247 assert_eq_m512i(r, _mm512_setzero_si512());
45248 let r = _mm512_maskz_mov_epi32(0b11111111_11111111, a);
45249 assert_eq_m512i(r, a);
45250 }
45251
45252 #[simd_test(enable = "avx512f,avx512vl")]
45253 const fn test_mm256_mask_mov_epi32() {
45254 let src = _mm256_set1_epi32(1);
45255 let a = _mm256_set1_epi32(2);
45256 let r = _mm256_mask_mov_epi32(src, 0, a);
45257 assert_eq_m256i(r, src);
45258 let r = _mm256_mask_mov_epi32(src, 0b11111111, a);
45259 assert_eq_m256i(r, a);
45260 }
45261
45262 #[simd_test(enable = "avx512f,avx512vl")]
45263 const fn test_mm256_maskz_mov_epi32() {
45264 let a = _mm256_set1_epi32(2);
45265 let r = _mm256_maskz_mov_epi32(0, a);
45266 assert_eq_m256i(r, _mm256_setzero_si256());
45267 let r = _mm256_maskz_mov_epi32(0b11111111, a);
45268 assert_eq_m256i(r, a);
45269 }
45270
45271 #[simd_test(enable = "avx512f,avx512vl")]
45272 const fn test_mm_mask_mov_epi32() {
45273 let src = _mm_set1_epi32(1);
45274 let a = _mm_set1_epi32(2);
45275 let r = _mm_mask_mov_epi32(src, 0, a);
45276 assert_eq_m128i(r, src);
45277 let r = _mm_mask_mov_epi32(src, 0b00001111, a);
45278 assert_eq_m128i(r, a);
45279 }
45280
45281 #[simd_test(enable = "avx512f,avx512vl")]
45282 const fn test_mm_maskz_mov_epi32() {
45283 let a = _mm_set1_epi32(2);
45284 let r = _mm_maskz_mov_epi32(0, a);
45285 assert_eq_m128i(r, _mm_setzero_si128());
45286 let r = _mm_maskz_mov_epi32(0b00001111, a);
45287 assert_eq_m128i(r, a);
45288 }
45289
45290 #[simd_test(enable = "avx512f")]
45291 const fn test_mm512_mask_mov_ps() {
45292 let src = _mm512_set1_ps(1.);
45293 let a = _mm512_set1_ps(2.);
45294 let r = _mm512_mask_mov_ps(src, 0, a);
45295 assert_eq_m512(r, src);
45296 let r = _mm512_mask_mov_ps(src, 0b11111111_11111111, a);
45297 assert_eq_m512(r, a);
45298 }
45299
45300 #[simd_test(enable = "avx512f")]
45301 const fn test_mm512_maskz_mov_ps() {
45302 let a = _mm512_set1_ps(2.);
45303 let r = _mm512_maskz_mov_ps(0, a);
45304 assert_eq_m512(r, _mm512_setzero_ps());
45305 let r = _mm512_maskz_mov_ps(0b11111111_11111111, a);
45306 assert_eq_m512(r, a);
45307 }
45308
45309 #[simd_test(enable = "avx512f,avx512vl")]
45310 const fn test_mm256_mask_mov_ps() {
45311 let src = _mm256_set1_ps(1.);
45312 let a = _mm256_set1_ps(2.);
45313 let r = _mm256_mask_mov_ps(src, 0, a);
45314 assert_eq_m256(r, src);
45315 let r = _mm256_mask_mov_ps(src, 0b11111111, a);
45316 assert_eq_m256(r, a);
45317 }
45318
45319 #[simd_test(enable = "avx512f,avx512vl")]
45320 const fn test_mm256_maskz_mov_ps() {
45321 let a = _mm256_set1_ps(2.);
45322 let r = _mm256_maskz_mov_ps(0, a);
45323 assert_eq_m256(r, _mm256_setzero_ps());
45324 let r = _mm256_maskz_mov_ps(0b11111111, a);
45325 assert_eq_m256(r, a);
45326 }
45327
45328 #[simd_test(enable = "avx512f,avx512vl")]
45329 const fn test_mm_mask_mov_ps() {
45330 let src = _mm_set1_ps(1.);
45331 let a = _mm_set1_ps(2.);
45332 let r = _mm_mask_mov_ps(src, 0, a);
45333 assert_eq_m128(r, src);
45334 let r = _mm_mask_mov_ps(src, 0b00001111, a);
45335 assert_eq_m128(r, a);
45336 }
45337
45338 #[simd_test(enable = "avx512f,avx512vl")]
45339 const fn test_mm_maskz_mov_ps() {
45340 let a = _mm_set1_ps(2.);
45341 let r = _mm_maskz_mov_ps(0, a);
45342 assert_eq_m128(r, _mm_setzero_ps());
45343 let r = _mm_maskz_mov_ps(0b00001111, a);
45344 assert_eq_m128(r, a);
45345 }
45346
45347 #[simd_test(enable = "avx512f")]
45348 const fn test_mm512_add_epi32() {
45349 #[rustfmt::skip]
45350 let a = _mm512_setr_epi32(
45351 0, 1, -1, i32::MAX,
45352 i32::MIN, 100, -100, -32,
45353 0, 1, -1, i32::MAX,
45354 i32::MIN, 100, -100, -32,
45355 );
45356 let b = _mm512_set1_epi32(1);
45357 let r = _mm512_add_epi32(a, b);
45358 #[rustfmt::skip]
45359 let e = _mm512_setr_epi32(
45360 1, 2, 0, i32::MIN,
45361 i32::MIN + 1, 101, -99, -31,
45362 1, 2, 0, i32::MIN,
45363 i32::MIN + 1, 101, -99, -31,
45364 );
45365 assert_eq_m512i(r, e);
45366 }
45367
45368 #[simd_test(enable = "avx512f")]
45369 const fn test_mm512_mask_add_epi32() {
45370 #[rustfmt::skip]
45371 let a = _mm512_setr_epi32(
45372 0, 1, -1, i32::MAX,
45373 i32::MIN, 100, -100, -32,
45374 0, 1, -1, i32::MAX,
45375 i32::MIN, 100, -100, -32,
45376 );
45377 let b = _mm512_set1_epi32(1);
45378 let r = _mm512_mask_add_epi32(a, 0, a, b);
45379 assert_eq_m512i(r, a);
45380 let r = _mm512_mask_add_epi32(a, 0b00000000_11111111, a, b);
45381 #[rustfmt::skip]
45382 let e = _mm512_setr_epi32(
45383 1, 2, 0, i32::MIN,
45384 i32::MIN + 1, 101, -99, -31,
45385 0, 1, -1, i32::MAX,
45386 i32::MIN, 100, -100, -32,
45387 );
45388 assert_eq_m512i(r, e);
45389 }
45390
45391 #[simd_test(enable = "avx512f")]
45392 const fn test_mm512_maskz_add_epi32() {
45393 #[rustfmt::skip]
45394 let a = _mm512_setr_epi32(
45395 0, 1, -1, i32::MAX,
45396 i32::MIN, 100, -100, -32,
45397 0, 1, -1, i32::MAX,
45398 i32::MIN, 100, -100, -32,
45399 );
45400 let b = _mm512_set1_epi32(1);
45401 let r = _mm512_maskz_add_epi32(0, a, b);
45402 assert_eq_m512i(r, _mm512_setzero_si512());
45403 let r = _mm512_maskz_add_epi32(0b00000000_11111111, a, b);
45404 #[rustfmt::skip]
45405 let e = _mm512_setr_epi32(
45406 1, 2, 0, i32::MIN,
45407 i32::MIN + 1, 101, -99, -31,
45408 0, 0, 0, 0,
45409 0, 0, 0, 0,
45410 );
45411 assert_eq_m512i(r, e);
45412 }
45413
45414 #[simd_test(enable = "avx512f,avx512vl")]
45415 const fn test_mm256_mask_add_epi32() {
45416 let a = _mm256_set_epi32(0, 1, -1, i32::MAX, i32::MIN, 100, -100, -32);
45417 let b = _mm256_set1_epi32(1);
45418 let r = _mm256_mask_add_epi32(a, 0, a, b);
45419 assert_eq_m256i(r, a);
45420 let r = _mm256_mask_add_epi32(a, 0b11111111, a, b);
45421 let e = _mm256_set_epi32(1, 2, 0, i32::MIN, i32::MIN + 1, 101, -99, -31);
45422 assert_eq_m256i(r, e);
45423 }
45424
45425 #[simd_test(enable = "avx512f,avx512vl")]
45426 const fn test_mm256_maskz_add_epi32() {
45427 let a = _mm256_setr_epi32(0, 1, -1, i32::MAX, i32::MIN, 100, -100, -32);
45428 let b = _mm256_set1_epi32(1);
45429 let r = _mm256_maskz_add_epi32(0, a, b);
45430 assert_eq_m256i(r, _mm256_setzero_si256());
45431 let r = _mm256_maskz_add_epi32(0b11111111, a, b);
45432 let e = _mm256_setr_epi32(1, 2, 0, i32::MIN, i32::MIN + 1, 101, -99, -31);
45433 assert_eq_m256i(r, e);
45434 }
45435
45436 #[simd_test(enable = "avx512f,avx512vl")]
45437 const fn test_mm_mask_add_epi32() {
45438 let a = _mm_set_epi32(1, -1, i32::MAX, i32::MIN);
45439 let b = _mm_set1_epi32(1);
45440 let r = _mm_mask_add_epi32(a, 0, a, b);
45441 assert_eq_m128i(r, a);
45442 let r = _mm_mask_add_epi32(a, 0b00001111, a, b);
45443 let e = _mm_set_epi32(2, 0, i32::MIN, i32::MIN + 1);
45444 assert_eq_m128i(r, e);
45445 }
45446
45447 #[simd_test(enable = "avx512f,avx512vl")]
45448 const fn test_mm_maskz_add_epi32() {
45449 let a = _mm_setr_epi32(1, -1, i32::MAX, i32::MIN);
45450 let b = _mm_set1_epi32(1);
45451 let r = _mm_maskz_add_epi32(0, a, b);
45452 assert_eq_m128i(r, _mm_setzero_si128());
45453 let r = _mm_maskz_add_epi32(0b00001111, a, b);
45454 let e = _mm_setr_epi32(2, 0, i32::MIN, i32::MIN + 1);
45455 assert_eq_m128i(r, e);
45456 }
45457
45458 #[simd_test(enable = "avx512f")]
45459 const fn test_mm512_add_ps() {
45460 #[rustfmt::skip]
45461 let a = _mm512_setr_ps(
45462 0., 1., -1., f32::MAX,
45463 f32::MIN, 100., -100., -32.,
45464 0., 1., -1., f32::MAX,
45465 f32::MIN, 100., -100., -32.,
45466 );
45467 let b = _mm512_set1_ps(1.);
45468 let r = _mm512_add_ps(a, b);
45469 #[rustfmt::skip]
45470 let e = _mm512_setr_ps(
45471 1., 2., 0., f32::MAX,
45472 f32::MIN + 1., 101., -99., -31.,
45473 1., 2., 0., f32::MAX,
45474 f32::MIN + 1., 101., -99., -31.,
45475 );
45476 assert_eq_m512(r, e);
45477 }
45478
45479 #[simd_test(enable = "avx512f")]
45480 const fn test_mm512_mask_add_ps() {
45481 #[rustfmt::skip]
45482 let a = _mm512_setr_ps(
45483 0., 1., -1., f32::MAX,
45484 f32::MIN, 100., -100., -32.,
45485 0., 1., -1., f32::MAX,
45486 f32::MIN, 100., -100., -32.,
45487 );
45488 let b = _mm512_set1_ps(1.);
45489 let r = _mm512_mask_add_ps(a, 0, a, b);
45490 assert_eq_m512(r, a);
45491 let r = _mm512_mask_add_ps(a, 0b00000000_11111111, a, b);
45492 #[rustfmt::skip]
45493 let e = _mm512_setr_ps(
45494 1., 2., 0., f32::MAX,
45495 f32::MIN + 1., 101., -99., -31.,
45496 0., 1., -1., f32::MAX,
45497 f32::MIN, 100., -100., -32.,
45498 );
45499 assert_eq_m512(r, e);
45500 }
45501
45502 #[simd_test(enable = "avx512f")]
45503 const fn test_mm512_maskz_add_ps() {
45504 #[rustfmt::skip]
45505 let a = _mm512_setr_ps(
45506 0., 1., -1., f32::MAX,
45507 f32::MIN, 100., -100., -32.,
45508 0., 1., -1., f32::MAX,
45509 f32::MIN, 100., -100., -32.,
45510 );
45511 let b = _mm512_set1_ps(1.);
45512 let r = _mm512_maskz_add_ps(0, a, b);
45513 assert_eq_m512(r, _mm512_setzero_ps());
45514 let r = _mm512_maskz_add_ps(0b00000000_11111111, a, b);
45515 #[rustfmt::skip]
45516 let e = _mm512_setr_ps(
45517 1., 2., 0., f32::MAX,
45518 f32::MIN + 1., 101., -99., -31.,
45519 0., 0., 0., 0.,
45520 0., 0., 0., 0.,
45521 );
45522 assert_eq_m512(r, e);
45523 }
45524
45525 #[simd_test(enable = "avx512f,avx512vl")]
45526 const fn test_mm256_mask_add_ps() {
45527 let a = _mm256_set_ps(0., 1., -1., f32::MAX, f32::MIN, 100., -100., -32.);
45528 let b = _mm256_set1_ps(1.);
45529 let r = _mm256_mask_add_ps(a, 0, a, b);
45530 assert_eq_m256(r, a);
45531 let r = _mm256_mask_add_ps(a, 0b11111111, a, b);
45532 let e = _mm256_set_ps(1., 2., 0., f32::MAX, f32::MIN + 1., 101., -99., -31.);
45533 assert_eq_m256(r, e);
45534 }
45535
45536 #[simd_test(enable = "avx512f,avx512vl")]
45537 const fn test_mm256_maskz_add_ps() {
45538 let a = _mm256_set_ps(0., 1., -1., f32::MAX, f32::MIN, 100., -100., -32.);
45539 let b = _mm256_set1_ps(1.);
45540 let r = _mm256_maskz_add_ps(0, a, b);
45541 assert_eq_m256(r, _mm256_setzero_ps());
45542 let r = _mm256_maskz_add_ps(0b11111111, a, b);
45543 let e = _mm256_set_ps(1., 2., 0., f32::MAX, f32::MIN + 1., 101., -99., -31.);
45544 assert_eq_m256(r, e);
45545 }
45546
45547 #[simd_test(enable = "avx512f,avx512vl")]
45548 const fn test_mm_mask_add_ps() {
45549 let a = _mm_set_ps(1., -1., f32::MAX, f32::MIN);
45550 let b = _mm_set1_ps(1.);
45551 let r = _mm_mask_add_ps(a, 0, a, b);
45552 assert_eq_m128(r, a);
45553 let r = _mm_mask_add_ps(a, 0b00001111, a, b);
45554 let e = _mm_set_ps(2., 0., f32::MAX, f32::MIN + 1.);
45555 assert_eq_m128(r, e);
45556 }
45557
45558 #[simd_test(enable = "avx512f,avx512vl")]
45559 const fn test_mm_maskz_add_ps() {
45560 let a = _mm_set_ps(1., -1., f32::MAX, f32::MIN);
45561 let b = _mm_set1_ps(1.);
45562 let r = _mm_maskz_add_ps(0, a, b);
45563 assert_eq_m128(r, _mm_setzero_ps());
45564 let r = _mm_maskz_add_ps(0b00001111, a, b);
45565 let e = _mm_set_ps(2., 0., f32::MAX, f32::MIN + 1.);
45566 assert_eq_m128(r, e);
45567 }
45568
45569 #[simd_test(enable = "avx512f")]
45570 const fn test_mm512_sub_epi32() {
45571 #[rustfmt::skip]
45572 let a = _mm512_setr_epi32(
45573 0, 1, -1, i32::MAX,
45574 i32::MIN, 100, -100, -32,
45575 0, 1, -1, i32::MAX,
45576 i32::MIN, 100, -100, -32,
45577 );
45578 let b = _mm512_set1_epi32(1);
45579 let r = _mm512_sub_epi32(a, b);
45580 #[rustfmt::skip]
45581 let e = _mm512_setr_epi32(
45582 -1, 0, -2, i32::MAX - 1,
45583 i32::MAX, 99, -101, -33,
45584 -1, 0, -2, i32::MAX - 1,
45585 i32::MAX, 99, -101, -33,
45586 );
45587 assert_eq_m512i(r, e);
45588 }
45589
45590 #[simd_test(enable = "avx512f")]
45591 const fn test_mm512_mask_sub_epi32() {
45592 #[rustfmt::skip]
45593 let a = _mm512_setr_epi32(
45594 0, 1, -1, i32::MAX,
45595 i32::MIN, 100, -100, -32,
45596 0, 1, -1, i32::MAX,
45597 i32::MIN, 100, -100, -32,
45598 );
45599 let b = _mm512_set1_epi32(1);
45600 let r = _mm512_mask_sub_epi32(a, 0, a, b);
45601 assert_eq_m512i(r, a);
45602 let r = _mm512_mask_sub_epi32(a, 0b00000000_11111111, a, b);
45603 #[rustfmt::skip]
45604 let e = _mm512_setr_epi32(
45605 -1, 0, -2, i32::MAX - 1,
45606 i32::MAX, 99, -101, -33,
45607 0, 1, -1, i32::MAX,
45608 i32::MIN, 100, -100, -32,
45609 );
45610 assert_eq_m512i(r, e);
45611 }
45612
45613 #[simd_test(enable = "avx512f")]
45614 const fn test_mm512_maskz_sub_epi32() {
45615 #[rustfmt::skip]
45616 let a = _mm512_setr_epi32(
45617 0, 1, -1, i32::MAX,
45618 i32::MIN, 100, -100, -32,
45619 0, 1, -1, i32::MAX,
45620 i32::MIN, 100, -100, -32,
45621 );
45622 let b = _mm512_set1_epi32(1);
45623 let r = _mm512_maskz_sub_epi32(0, a, b);
45624 assert_eq_m512i(r, _mm512_setzero_si512());
45625 let r = _mm512_maskz_sub_epi32(0b00000000_11111111, a, b);
45626 #[rustfmt::skip]
45627 let e = _mm512_setr_epi32(
45628 -1, 0, -2, i32::MAX - 1,
45629 i32::MAX, 99, -101, -33,
45630 0, 0, 0, 0,
45631 0, 0, 0, 0,
45632 );
45633 assert_eq_m512i(r, e);
45634 }
45635
45636 #[simd_test(enable = "avx512f,avx512vl")]
45637 const fn test_mm256_mask_sub_epi32() {
45638 let a = _mm256_set_epi32(0, 1, -1, i32::MAX, i32::MIN, 100, -100, -32);
45639 let b = _mm256_set1_epi32(1);
45640 let r = _mm256_mask_sub_epi32(a, 0, a, b);
45641 assert_eq_m256i(r, a);
45642 let r = _mm256_mask_sub_epi32(a, 0b11111111, a, b);
45643 let e = _mm256_set_epi32(-1, 0, -2, i32::MAX - 1, i32::MAX, 99, -101, -33);
45644 assert_eq_m256i(r, e);
45645 }
45646
45647 #[simd_test(enable = "avx512f,avx512vl")]
45648 const fn test_mm256_maskz_sub_epi32() {
45649 let a = _mm256_set_epi32(0, 1, -1, i32::MAX, i32::MIN, 100, -100, -32);
45650 let b = _mm256_set1_epi32(1);
45651 let r = _mm256_maskz_sub_epi32(0, a, b);
45652 assert_eq_m256i(r, _mm256_setzero_si256());
45653 let r = _mm256_maskz_sub_epi32(0b11111111, a, b);
45654 let e = _mm256_set_epi32(-1, 0, -2, i32::MAX - 1, i32::MAX, 99, -101, -33);
45655 assert_eq_m256i(r, e);
45656 }
45657
45658 #[simd_test(enable = "avx512f,avx512vl")]
45659 const fn test_mm_mask_sub_epi32() {
45660 let a = _mm_set_epi32(1, -1, i32::MAX, i32::MIN);
45661 let b = _mm_set1_epi32(1);
45662 let r = _mm_mask_sub_epi32(a, 0, a, b);
45663 assert_eq_m128i(r, a);
45664 let r = _mm_mask_sub_epi32(a, 0b00001111, a, b);
45665 let e = _mm_set_epi32(0, -2, i32::MAX - 1, i32::MAX);
45666 assert_eq_m128i(r, e);
45667 }
45668
45669 #[simd_test(enable = "avx512f,avx512vl")]
45670 const fn test_mm_maskz_sub_epi32() {
45671 let a = _mm_set_epi32(1, -1, i32::MAX, i32::MIN);
45672 let b = _mm_set1_epi32(1);
45673 let r = _mm_maskz_sub_epi32(0, a, b);
45674 assert_eq_m128i(r, _mm_setzero_si128());
45675 let r = _mm_maskz_sub_epi32(0b00001111, a, b);
45676 let e = _mm_set_epi32(0, -2, i32::MAX - 1, i32::MAX);
45677 assert_eq_m128i(r, e);
45678 }
45679
45680 #[simd_test(enable = "avx512f")]
45681 const fn test_mm512_sub_ps() {
45682 #[rustfmt::skip]
45683 let a = _mm512_setr_ps(
45684 0., 1., -1., f32::MAX,
45685 f32::MIN, 100., -100., -32.,
45686 0., 1., -1., f32::MAX,
45687 f32::MIN, 100., -100., -32.,
45688 );
45689 let b = _mm512_set1_ps(1.);
45690 let r = _mm512_sub_ps(a, b);
45691 #[rustfmt::skip]
45692 let e = _mm512_setr_ps(
45693 -1., 0., -2., f32::MAX - 1.,
45694 f32::MIN, 99., -101., -33.,
45695 -1., 0., -2., f32::MAX - 1.,
45696 f32::MIN, 99., -101., -33.,
45697 );
45698 assert_eq_m512(r, e);
45699 }
45700
45701 #[simd_test(enable = "avx512f")]
45702 const fn test_mm512_mask_sub_ps() {
45703 #[rustfmt::skip]
45704 let a = _mm512_setr_ps(
45705 0., 1., -1., f32::MAX,
45706 f32::MIN, 100., -100., -32.,
45707 0., 1., -1., f32::MAX,
45708 f32::MIN, 100., -100., -32.,
45709 );
45710 let b = _mm512_set1_ps(1.);
45711 let r = _mm512_mask_sub_ps(a, 0, a, b);
45712 assert_eq_m512(r, a);
45713 let r = _mm512_mask_sub_ps(a, 0b00000000_11111111, a, b);
45714 #[rustfmt::skip]
45715 let e = _mm512_setr_ps(
45716 -1., 0., -2., f32::MAX - 1.,
45717 f32::MIN, 99., -101., -33.,
45718 0., 1., -1., f32::MAX,
45719 f32::MIN, 100., -100., -32.,
45720 );
45721 assert_eq_m512(r, e);
45722 }
45723
45724 #[simd_test(enable = "avx512f")]
45725 const fn test_mm512_maskz_sub_ps() {
45726 #[rustfmt::skip]
45727 let a = _mm512_setr_ps(
45728 0., 1., -1., f32::MAX,
45729 f32::MIN, 100., -100., -32.,
45730 0., 1., -1., f32::MAX,
45731 f32::MIN, 100., -100., -32.,
45732 );
45733 let b = _mm512_set1_ps(1.);
45734 let r = _mm512_maskz_sub_ps(0, a, b);
45735 assert_eq_m512(r, _mm512_setzero_ps());
45736 let r = _mm512_maskz_sub_ps(0b00000000_11111111, a, b);
45737 #[rustfmt::skip]
45738 let e = _mm512_setr_ps(
45739 -1., 0., -2., f32::MAX - 1.,
45740 f32::MIN, 99., -101., -33.,
45741 0., 0., 0., 0.,
45742 0., 0., 0., 0.,
45743 );
45744 assert_eq_m512(r, e);
45745 }
45746
45747 #[simd_test(enable = "avx512f,avx512vl")]
45748 const fn test_mm256_mask_sub_ps() {
45749 let a = _mm256_set_ps(0., 1., -1., f32::MAX, f32::MIN, 100., -100., -32.);
45750 let b = _mm256_set1_ps(1.);
45751 let r = _mm256_mask_sub_ps(a, 0, a, b);
45752 assert_eq_m256(r, a);
45753 let r = _mm256_mask_sub_ps(a, 0b11111111, a, b);
45754 let e = _mm256_set_ps(-1., 0., -2., f32::MAX - 1., f32::MIN, 99., -101., -33.);
45755 assert_eq_m256(r, e);
45756 }
45757
45758 #[simd_test(enable = "avx512f,avx512vl")]
45759 const fn test_mm256_maskz_sub_ps() {
45760 let a = _mm256_set_ps(0., 1., -1., f32::MAX, f32::MIN, 100., -100., -32.);
45761 let b = _mm256_set1_ps(1.);
45762 let r = _mm256_maskz_sub_ps(0, a, b);
45763 assert_eq_m256(r, _mm256_setzero_ps());
45764 let r = _mm256_maskz_sub_ps(0b11111111, a, b);
45765 let e = _mm256_set_ps(-1., 0., -2., f32::MAX - 1., f32::MIN, 99., -101., -33.);
45766 assert_eq_m256(r, e);
45767 }
45768
45769 #[simd_test(enable = "avx512f,avx512vl")]
45770 const fn test_mm_mask_sub_ps() {
45771 let a = _mm_set_ps(1., -1., f32::MAX, f32::MIN);
45772 let b = _mm_set1_ps(1.);
45773 let r = _mm_mask_sub_ps(a, 0, a, b);
45774 assert_eq_m128(r, a);
45775 let r = _mm_mask_sub_ps(a, 0b00001111, a, b);
45776 let e = _mm_set_ps(0., -2., f32::MAX - 1., f32::MIN);
45777 assert_eq_m128(r, e);
45778 }
45779
45780 #[simd_test(enable = "avx512f,avx512vl")]
45781 const fn test_mm_maskz_sub_ps() {
45782 let a = _mm_set_ps(1., -1., f32::MAX, f32::MIN);
45783 let b = _mm_set1_ps(1.);
45784 let r = _mm_maskz_sub_ps(0, a, b);
45785 assert_eq_m128(r, _mm_setzero_ps());
45786 let r = _mm_maskz_sub_ps(0b00001111, a, b);
45787 let e = _mm_set_ps(0., -2., f32::MAX - 1., f32::MIN);
45788 assert_eq_m128(r, e);
45789 }
45790
45791 #[simd_test(enable = "avx512f")]
45792 const fn test_mm512_mullo_epi32() {
45793 #[rustfmt::skip]
45794 let a = _mm512_setr_epi32(
45795 0, 1, -1, i32::MAX,
45796 i32::MIN, 100, -100, -32,
45797 0, 1, -1, i32::MAX,
45798 i32::MIN, 100, -100, -32,
45799 );
45800 let b = _mm512_set1_epi32(2);
45801 let r = _mm512_mullo_epi32(a, b);
45802 let e = _mm512_setr_epi32(
45803 0, 2, -2, -2, 0, 200, -200, -64, 0, 2, -2, -2, 0, 200, -200, -64,
45804 );
45805 assert_eq_m512i(r, e);
45806 }
45807
45808 #[simd_test(enable = "avx512f")]
45809 const fn test_mm512_mask_mullo_epi32() {
45810 #[rustfmt::skip]
45811 let a = _mm512_setr_epi32(
45812 0, 1, -1, i32::MAX,
45813 i32::MIN, 100, -100, -32,
45814 0, 1, -1, i32::MAX,
45815 i32::MIN, 100, -100, -32,
45816 );
45817 let b = _mm512_set1_epi32(2);
45818 let r = _mm512_mask_mullo_epi32(a, 0, a, b);
45819 assert_eq_m512i(r, a);
45820 let r = _mm512_mask_mullo_epi32(a, 0b00000000_11111111, a, b);
45821 #[rustfmt::skip]
45822 let e = _mm512_setr_epi32(
45823 0, 2, -2, -2,
45824 0, 200, -200, -64,
45825 0, 1, -1, i32::MAX,
45826 i32::MIN, 100, -100, -32,
45827 );
45828 assert_eq_m512i(r, e);
45829 }
45830
45831 #[simd_test(enable = "avx512f")]
45832 const fn test_mm512_maskz_mullo_epi32() {
45833 #[rustfmt::skip]
45834 let a = _mm512_setr_epi32(
45835 0, 1, -1, i32::MAX,
45836 i32::MIN, 100, -100, -32,
45837 0, 1, -1, i32::MAX,
45838 i32::MIN, 100, -100, -32,
45839 );
45840 let b = _mm512_set1_epi32(2);
45841 let r = _mm512_maskz_mullo_epi32(0, a, b);
45842 assert_eq_m512i(r, _mm512_setzero_si512());
45843 let r = _mm512_maskz_mullo_epi32(0b00000000_11111111, a, b);
45844 let e = _mm512_setr_epi32(0, 2, -2, -2, 0, 200, -200, -64, 0, 0, 0, 0, 0, 0, 0, 0);
45845 assert_eq_m512i(r, e);
45846 }
45847
45848 #[simd_test(enable = "avx512f,avx512vl")]
45849 const fn test_mm256_mask_mullo_epi32() {
45850 let a = _mm256_set_epi32(0, 1, -1, i32::MAX, i32::MIN, 100, -100, -32);
45851 let b = _mm256_set1_epi32(2);
45852 let r = _mm256_mask_mullo_epi32(a, 0, a, b);
45853 assert_eq_m256i(r, a);
45854 let r = _mm256_mask_mullo_epi32(a, 0b11111111, a, b);
45855 let e = _mm256_set_epi32(0, 2, -2, -2, 0, 200, -200, -64);
45856 assert_eq_m256i(r, e);
45857 }
45858
45859 #[simd_test(enable = "avx512f,avx512vl")]
45860 const fn test_mm256_maskz_mullo_epi32() {
45861 let a = _mm256_set_epi32(0, 1, -1, i32::MAX, i32::MIN, 100, -100, -32);
45862 let b = _mm256_set1_epi32(2);
45863 let r = _mm256_maskz_mullo_epi32(0, a, b);
45864 assert_eq_m256i(r, _mm256_setzero_si256());
45865 let r = _mm256_maskz_mullo_epi32(0b11111111, a, b);
45866 let e = _mm256_set_epi32(0, 2, -2, -2, 0, 200, -200, -64);
45867 assert_eq_m256i(r, e);
45868 }
45869
45870 #[simd_test(enable = "avx512f,avx512vl")]
45871 const fn test_mm_mask_mullo_epi32() {
45872 let a = _mm_set_epi32(1, -1, i32::MAX, i32::MIN);
45873 let b = _mm_set1_epi32(2);
45874 let r = _mm_mask_mullo_epi32(a, 0, a, b);
45875 assert_eq_m128i(r, a);
45876 let r = _mm_mask_mullo_epi32(a, 0b00001111, a, b);
45877 let e = _mm_set_epi32(2, -2, -2, 0);
45878 assert_eq_m128i(r, e);
45879 }
45880
45881 #[simd_test(enable = "avx512f,avx512vl")]
45882 const fn test_mm_maskz_mullo_epi32() {
45883 let a = _mm_set_epi32(1, -1, i32::MAX, i32::MIN);
45884 let b = _mm_set1_epi32(2);
45885 let r = _mm_maskz_mullo_epi32(0, a, b);
45886 assert_eq_m128i(r, _mm_setzero_si128());
45887 let r = _mm_maskz_mullo_epi32(0b00001111, a, b);
45888 let e = _mm_set_epi32(2, -2, -2, 0);
45889 assert_eq_m128i(r, e);
45890 }
45891
45892 #[simd_test(enable = "avx512f")]
45893 const fn test_mm512_mul_ps() {
45894 #[rustfmt::skip]
45895 let a = _mm512_setr_ps(
45896 0., 1., -1., f32::MAX,
45897 f32::MIN, 100., -100., -32.,
45898 0., 1., -1., f32::MAX,
45899 f32::MIN, 100., -100., -32.,
45900 );
45901 let b = _mm512_set1_ps(2.);
45902 let r = _mm512_mul_ps(a, b);
45903 #[rustfmt::skip]
45904 let e = _mm512_setr_ps(
45905 0., 2., -2., f32::INFINITY,
45906 f32::NEG_INFINITY, 200., -200., -64.,
45907 0., 2., -2., f32::INFINITY,
45908 f32::NEG_INFINITY, 200., -200.,
45909 -64.,
45910 );
45911 assert_eq_m512(r, e);
45912 }
45913
45914 #[simd_test(enable = "avx512f")]
45915 const fn test_mm512_mask_mul_ps() {
45916 #[rustfmt::skip]
45917 let a = _mm512_setr_ps(
45918 0., 1., -1., f32::MAX,
45919 f32::MIN, 100., -100., -32.,
45920 0., 1., -1., f32::MAX,
45921 f32::MIN, 100., -100., -32.,
45922 );
45923 let b = _mm512_set1_ps(2.);
45924 let r = _mm512_mask_mul_ps(a, 0, a, b);
45925 assert_eq_m512(r, a);
45926 let r = _mm512_mask_mul_ps(a, 0b00000000_11111111, a, b);
45927 #[rustfmt::skip]
45928 let e = _mm512_setr_ps(
45929 0., 2., -2., f32::INFINITY,
45930 f32::NEG_INFINITY, 200., -200., -64.,
45931 0., 1., -1., f32::MAX,
45932 f32::MIN, 100., -100., -32.,
45933 );
45934 assert_eq_m512(r, e);
45935 }
45936
45937 #[simd_test(enable = "avx512f")]
45938 const fn test_mm512_maskz_mul_ps() {
45939 #[rustfmt::skip]
45940 let a = _mm512_setr_ps(
45941 0., 1., -1., f32::MAX,
45942 f32::MIN, 100., -100., -32.,
45943 0., 1., -1., f32::MAX,
45944 f32::MIN, 100., -100., -32.,
45945 );
45946 let b = _mm512_set1_ps(2.);
45947 let r = _mm512_maskz_mul_ps(0, a, b);
45948 assert_eq_m512(r, _mm512_setzero_ps());
45949 let r = _mm512_maskz_mul_ps(0b00000000_11111111, a, b);
45950 #[rustfmt::skip]
45951 let e = _mm512_setr_ps(
45952 0., 2., -2., f32::INFINITY,
45953 f32::NEG_INFINITY, 200., -200., -64.,
45954 0., 0., 0., 0.,
45955 0., 0., 0., 0.,
45956 );
45957 assert_eq_m512(r, e);
45958 }
45959
45960 #[simd_test(enable = "avx512f,avx512vl")]
45961 const fn test_mm256_mask_mul_ps() {
45962 let a = _mm256_set_ps(0., 1., -1., f32::MAX, f32::MIN, 100., -100., -32.);
45963 let b = _mm256_set1_ps(2.);
45964 let r = _mm256_mask_mul_ps(a, 0, a, b);
45965 assert_eq_m256(r, a);
45966 let r = _mm256_mask_mul_ps(a, 0b11111111, a, b);
45967 #[rustfmt::skip]
45968 let e = _mm256_set_ps(
45969 0., 2., -2., f32::INFINITY,
45970 f32::NEG_INFINITY, 200., -200., -64.,
45971 );
45972 assert_eq_m256(r, e);
45973 }
45974
45975 #[simd_test(enable = "avx512f,avx512vl")]
45976 const fn test_mm256_maskz_mul_ps() {
45977 let a = _mm256_set_ps(0., 1., -1., f32::MAX, f32::MIN, 100., -100., -32.);
45978 let b = _mm256_set1_ps(2.);
45979 let r = _mm256_maskz_mul_ps(0, a, b);
45980 assert_eq_m256(r, _mm256_setzero_ps());
45981 let r = _mm256_maskz_mul_ps(0b11111111, a, b);
45982 #[rustfmt::skip]
45983 let e = _mm256_set_ps(
45984 0., 2., -2., f32::INFINITY,
45985 f32::NEG_INFINITY, 200., -200., -64.,
45986 );
45987 assert_eq_m256(r, e);
45988 }
45989
45990 #[simd_test(enable = "avx512f,avx512vl")]
45991 const fn test_mm_mask_mul_ps() {
45992 let a = _mm_set_ps(1., -1., f32::MAX, f32::MIN);
45993 let b = _mm_set1_ps(2.);
45994 let r = _mm_mask_mul_ps(a, 0, a, b);
45995 assert_eq_m128(r, a);
45996 let r = _mm_mask_mul_ps(a, 0b00001111, a, b);
45997 let e = _mm_set_ps(2., -2., f32::INFINITY, f32::NEG_INFINITY);
45998 assert_eq_m128(r, e);
45999 }
46000
46001 #[simd_test(enable = "avx512f,avx512vl")]
46002 const fn test_mm_maskz_mul_ps() {
46003 let a = _mm_set_ps(1., -1., f32::MAX, f32::MIN);
46004 let b = _mm_set1_ps(2.);
46005 let r = _mm_maskz_mul_ps(0, a, b);
46006 assert_eq_m128(r, _mm_setzero_ps());
46007 let r = _mm_maskz_mul_ps(0b00001111, a, b);
46008 let e = _mm_set_ps(2., -2., f32::INFINITY, f32::NEG_INFINITY);
46009 assert_eq_m128(r, e);
46010 }
46011
46012 #[simd_test(enable = "avx512f")]
46013 const fn test_mm512_div_ps() {
46014 let a = _mm512_setr_ps(
46015 0., 1., -1., -2., 100., 100., -100., -32., 0., 1., -1., 1000., -131., 100., -100., -32.,
46016 );
46017 let b = _mm512_setr_ps(
46018 2., 2., 2., 2., 2., 0., 2., 2., 2., 2., 2., 2., 0., 2., 2., 2.,
46019 );
46020 let r = _mm512_div_ps(a, b);
46021 #[rustfmt::skip]
46022 let e = _mm512_setr_ps(
46023 0., 0.5, -0.5, -1.,
46024 50., f32::INFINITY, -50., -16.,
46025 0., 0.5, -0.5, 500.,
46026 f32::NEG_INFINITY, 50., -50., -16.,
46027 );
46028 assert_eq_m512(r, e); // 0/0 = NAN
46029 }
46030
46031 #[simd_test(enable = "avx512f")]
46032 const fn test_mm512_mask_div_ps() {
46033 let a = _mm512_setr_ps(
46034 0., 1., -1., -2., 100., 100., -100., -32., 0., 1., -1., 1000., -131., 100., -100., -32.,
46035 );
46036 let b = _mm512_setr_ps(
46037 2., 2., 2., 2., 2., 0., 2., 2., 2., 2., 2., 2., 0., 2., 2., 2.,
46038 );
46039 let r = _mm512_mask_div_ps(a, 0, a, b);
46040 assert_eq_m512(r, a);
46041 let r = _mm512_mask_div_ps(a, 0b00000000_11111111, a, b);
46042 #[rustfmt::skip]
46043 let e = _mm512_setr_ps(
46044 0., 0.5, -0.5, -1.,
46045 50., f32::INFINITY, -50., -16.,
46046 0., 1., -1., 1000.,
46047 -131., 100., -100., -32.,
46048 );
46049 assert_eq_m512(r, e);
46050 }
46051
46052 #[simd_test(enable = "avx512f")]
46053 const fn test_mm512_maskz_div_ps() {
46054 let a = _mm512_setr_ps(
46055 0., 1., -1., -2., 100., 100., -100., -32., 0., 1., -1., 1000., -131., 100., -100., -32.,
46056 );
46057 let b = _mm512_setr_ps(
46058 2., 2., 2., 2., 2., 0., 2., 2., 2., 2., 2., 2., 0., 2., 2., 2.,
46059 );
46060 let r = _mm512_maskz_div_ps(0, a, b);
46061 assert_eq_m512(r, _mm512_setzero_ps());
46062 let r = _mm512_maskz_div_ps(0b00000000_11111111, a, b);
46063 #[rustfmt::skip]
46064 let e = _mm512_setr_ps(
46065 0., 0.5, -0.5, -1.,
46066 50., f32::INFINITY, -50., -16.,
46067 0., 0., 0., 0.,
46068 0., 0., 0., 0.,
46069 );
46070 assert_eq_m512(r, e);
46071 }
46072
46073 #[simd_test(enable = "avx512f,avx512vl")]
46074 const fn test_mm256_mask_div_ps() {
46075 let a = _mm256_set_ps(0., 1., -1., -2., 100., 100., -100., -32.);
46076 let b = _mm256_set_ps(2., 2., 2., 2., 2., 0., 2., 2.);
46077 let r = _mm256_mask_div_ps(a, 0, a, b);
46078 assert_eq_m256(r, a);
46079 let r = _mm256_mask_div_ps(a, 0b11111111, a, b);
46080 let e = _mm256_set_ps(0., 0.5, -0.5, -1., 50., f32::INFINITY, -50., -16.);
46081 assert_eq_m256(r, e);
46082 }
46083
46084 #[simd_test(enable = "avx512f,avx512vl")]
46085 const fn test_mm256_maskz_div_ps() {
46086 let a = _mm256_set_ps(0., 1., -1., -2., 100., 100., -100., -32.);
46087 let b = _mm256_set_ps(2., 2., 2., 2., 2., 0., 2., 2.);
46088 let r = _mm256_maskz_div_ps(0, a, b);
46089 assert_eq_m256(r, _mm256_setzero_ps());
46090 let r = _mm256_maskz_div_ps(0b11111111, a, b);
46091 let e = _mm256_set_ps(0., 0.5, -0.5, -1., 50., f32::INFINITY, -50., -16.);
46092 assert_eq_m256(r, e);
46093 }
46094
46095 #[simd_test(enable = "avx512f,avx512vl")]
46096 const fn test_mm_mask_div_ps() {
46097 let a = _mm_set_ps(100., 100., -100., -32.);
46098 let b = _mm_set_ps(2., 0., 2., 2.);
46099 let r = _mm_mask_div_ps(a, 0, a, b);
46100 assert_eq_m128(r, a);
46101 let r = _mm_mask_div_ps(a, 0b00001111, a, b);
46102 let e = _mm_set_ps(50., f32::INFINITY, -50., -16.);
46103 assert_eq_m128(r, e);
46104 }
46105
46106 #[simd_test(enable = "avx512f,avx512vl")]
46107 const fn test_mm_maskz_div_ps() {
46108 let a = _mm_set_ps(100., 100., -100., -32.);
46109 let b = _mm_set_ps(2., 0., 2., 2.);
46110 let r = _mm_maskz_div_ps(0, a, b);
46111 assert_eq_m128(r, _mm_setzero_ps());
46112 let r = _mm_maskz_div_ps(0b00001111, a, b);
46113 let e = _mm_set_ps(50., f32::INFINITY, -50., -16.);
46114 assert_eq_m128(r, e);
46115 }
46116
46117 #[simd_test(enable = "avx512f")]
46118 const fn test_mm512_max_epi32() {
46119 let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
46120 let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
46121 let r = _mm512_max_epi32(a, b);
46122 let e = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15);
46123 assert_eq_m512i(r, e);
46124 }
46125
46126 #[simd_test(enable = "avx512f")]
46127 const fn test_mm512_mask_max_epi32() {
46128 let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
46129 let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
46130 let r = _mm512_mask_max_epi32(a, 0, a, b);
46131 assert_eq_m512i(r, a);
46132 let r = _mm512_mask_max_epi32(a, 0b00000000_11111111, a, b);
46133 let e = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15);
46134 assert_eq_m512i(r, e);
46135 }
46136
46137 #[simd_test(enable = "avx512f")]
46138 const fn test_mm512_maskz_max_epi32() {
46139 let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
46140 let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
46141 let r = _mm512_maskz_max_epi32(0, a, b);
46142 assert_eq_m512i(r, _mm512_setzero_si512());
46143 let r = _mm512_maskz_max_epi32(0b00000000_11111111, a, b);
46144 let e = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 0, 0, 0, 0, 0, 0, 0, 0);
46145 assert_eq_m512i(r, e);
46146 }
46147
46148 #[simd_test(enable = "avx512f,avx512vl")]
46149 const fn test_mm256_mask_max_epi32() {
46150 let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
46151 let b = _mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0);
46152 let r = _mm256_mask_max_epi32(a, 0, a, b);
46153 assert_eq_m256i(r, a);
46154 let r = _mm256_mask_max_epi32(a, 0b11111111, a, b);
46155 let e = _mm256_set_epi32(7, 6, 5, 4, 4, 5, 6, 7);
46156 assert_eq_m256i(r, e);
46157 }
46158
46159 #[simd_test(enable = "avx512f,avx512vl")]
46160 const fn test_mm256_maskz_max_epi32() {
46161 let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
46162 let b = _mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0);
46163 let r = _mm256_maskz_max_epi32(0, a, b);
46164 assert_eq_m256i(r, _mm256_setzero_si256());
46165 let r = _mm256_maskz_max_epi32(0b11111111, a, b);
46166 let e = _mm256_set_epi32(7, 6, 5, 4, 4, 5, 6, 7);
46167 assert_eq_m256i(r, e);
46168 }
46169
46170 #[simd_test(enable = "avx512f,avx512vl")]
46171 const fn test_mm_mask_max_epi32() {
46172 let a = _mm_set_epi32(0, 1, 2, 3);
46173 let b = _mm_set_epi32(3, 2, 1, 0);
46174 let r = _mm_mask_max_epi32(a, 0, a, b);
46175 assert_eq_m128i(r, a);
46176 let r = _mm_mask_max_epi32(a, 0b00001111, a, b);
46177 let e = _mm_set_epi32(3, 2, 2, 3);
46178 assert_eq_m128i(r, e);
46179 }
46180
46181 #[simd_test(enable = "avx512f,avx512vl")]
46182 const fn test_mm_maskz_max_epi32() {
46183 let a = _mm_set_epi32(0, 1, 2, 3);
46184 let b = _mm_set_epi32(3, 2, 1, 0);
46185 let r = _mm_maskz_max_epi32(0, a, b);
46186 assert_eq_m128i(r, _mm_setzero_si128());
46187 let r = _mm_maskz_max_epi32(0b00001111, a, b);
46188 let e = _mm_set_epi32(3, 2, 2, 3);
46189 assert_eq_m128i(r, e);
46190 }
46191
46192 #[simd_test(enable = "avx512f")]
46193 fn test_mm512_max_ps() {
46194 let a = _mm512_setr_ps(
46195 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
46196 );
46197 let b = _mm512_setr_ps(
46198 15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
46199 );
46200 let r = _mm512_max_ps(a, b);
46201 let e = _mm512_setr_ps(
46202 15., 14., 13., 12., 11., 10., 9., 8., 8., 9., 10., 11., 12., 13., 14., 15.,
46203 );
46204 assert_eq_m512(r, e);
46205 }
46206
46207 #[simd_test(enable = "avx512f")]
46208 fn test_mm512_mask_max_ps() {
46209 let a = _mm512_setr_ps(
46210 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
46211 );
46212 let b = _mm512_setr_ps(
46213 15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
46214 );
46215 let r = _mm512_mask_max_ps(a, 0, a, b);
46216 assert_eq_m512(r, a);
46217 let r = _mm512_mask_max_ps(a, 0b00000000_11111111, a, b);
46218 let e = _mm512_setr_ps(
46219 15., 14., 13., 12., 11., 10., 9., 8., 8., 9., 10., 11., 12., 13., 14., 15.,
46220 );
46221 assert_eq_m512(r, e);
46222 }
46223
46224 #[simd_test(enable = "avx512f")]
46225 fn test_mm512_maskz_max_ps() {
46226 let a = _mm512_setr_ps(
46227 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
46228 );
46229 let b = _mm512_setr_ps(
46230 15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
46231 );
46232 let r = _mm512_maskz_max_ps(0, a, b);
46233 assert_eq_m512(r, _mm512_setzero_ps());
46234 let r = _mm512_maskz_max_ps(0b00000000_11111111, a, b);
46235 let e = _mm512_setr_ps(
46236 15., 14., 13., 12., 11., 10., 9., 8., 0., 0., 0., 0., 0., 0., 0., 0.,
46237 );
46238 assert_eq_m512(r, e);
46239 }
46240
46241 #[simd_test(enable = "avx512f,avx512vl")]
46242 fn test_mm256_mask_max_ps() {
46243 let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
46244 let b = _mm256_set_ps(7., 6., 5., 4., 3., 2., 1., 0.);
46245 let r = _mm256_mask_max_ps(a, 0, a, b);
46246 assert_eq_m256(r, a);
46247 let r = _mm256_mask_max_ps(a, 0b11111111, a, b);
46248 let e = _mm256_set_ps(7., 6., 5., 4., 4., 5., 6., 7.);
46249 assert_eq_m256(r, e);
46250 }
46251
46252 #[simd_test(enable = "avx512f,avx512vl")]
46253 fn test_mm256_maskz_max_ps() {
46254 let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
46255 let b = _mm256_set_ps(7., 6., 5., 4., 3., 2., 1., 0.);
46256 let r = _mm256_maskz_max_ps(0, a, b);
46257 assert_eq_m256(r, _mm256_setzero_ps());
46258 let r = _mm256_maskz_max_ps(0b11111111, a, b);
46259 let e = _mm256_set_ps(7., 6., 5., 4., 4., 5., 6., 7.);
46260 assert_eq_m256(r, e);
46261 }
46262
46263 #[simd_test(enable = "avx512f,avx512vl")]
46264 fn test_mm_mask_max_ps() {
46265 let a = _mm_set_ps(0., 1., 2., 3.);
46266 let b = _mm_set_ps(3., 2., 1., 0.);
46267 let r = _mm_mask_max_ps(a, 0, a, b);
46268 assert_eq_m128(r, a);
46269 let r = _mm_mask_max_ps(a, 0b00001111, a, b);
46270 let e = _mm_set_ps(3., 2., 2., 3.);
46271 assert_eq_m128(r, e);
46272 }
46273
46274 #[simd_test(enable = "avx512f,avx512vl")]
46275 fn test_mm_maskz_max_ps() {
46276 let a = _mm_set_ps(0., 1., 2., 3.);
46277 let b = _mm_set_ps(3., 2., 1., 0.);
46278 let r = _mm_maskz_max_ps(0, a, b);
46279 assert_eq_m128(r, _mm_setzero_ps());
46280 let r = _mm_mask_max_ps(a, 0b00001111, a, b);
46281 let e = _mm_set_ps(3., 2., 2., 3.);
46282 assert_eq_m128(r, e);
46283 }
46284
46285 #[simd_test(enable = "avx512f")]
46286 const fn test_mm512_max_epu32() {
46287 let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
46288 let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
46289 let r = _mm512_max_epu32(a, b);
46290 let e = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15);
46291 assert_eq_m512i(r, e);
46292 }
46293
46294 #[simd_test(enable = "avx512f")]
46295 const fn test_mm512_mask_max_epu32() {
46296 let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
46297 let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
46298 let r = _mm512_mask_max_epu32(a, 0, a, b);
46299 assert_eq_m512i(r, a);
46300 let r = _mm512_mask_max_epu32(a, 0b00000000_11111111, a, b);
46301 let e = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15);
46302 assert_eq_m512i(r, e);
46303 }
46304
46305 #[simd_test(enable = "avx512f")]
46306 const fn test_mm512_maskz_max_epu32() {
46307 let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
46308 let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
46309 let r = _mm512_maskz_max_epu32(0, a, b);
46310 assert_eq_m512i(r, _mm512_setzero_si512());
46311 let r = _mm512_maskz_max_epu32(0b00000000_11111111, a, b);
46312 let e = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 0, 0, 0, 0, 0, 0, 0, 0);
46313 assert_eq_m512i(r, e);
46314 }
46315
46316 #[simd_test(enable = "avx512f,avx512vl")]
46317 const fn test_mm256_mask_max_epu32() {
46318 let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
46319 let b = _mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0);
46320 let r = _mm256_mask_max_epu32(a, 0, a, b);
46321 assert_eq_m256i(r, a);
46322 let r = _mm256_mask_max_epu32(a, 0b11111111, a, b);
46323 let e = _mm256_set_epi32(7, 6, 5, 4, 4, 5, 6, 7);
46324 assert_eq_m256i(r, e);
46325 }
46326
46327 #[simd_test(enable = "avx512f,avx512vl")]
46328 const fn test_mm256_maskz_max_epu32() {
46329 let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
46330 let b = _mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0);
46331 let r = _mm256_maskz_max_epu32(0, a, b);
46332 assert_eq_m256i(r, _mm256_setzero_si256());
46333 let r = _mm256_maskz_max_epu32(0b11111111, a, b);
46334 let e = _mm256_set_epi32(7, 6, 5, 4, 4, 5, 6, 7);
46335 assert_eq_m256i(r, e);
46336 }
46337
46338 #[simd_test(enable = "avx512f,avx512vl")]
46339 const fn test_mm_mask_max_epu32() {
46340 let a = _mm_set_epi32(0, 1, 2, 3);
46341 let b = _mm_set_epi32(3, 2, 1, 0);
46342 let r = _mm_mask_max_epu32(a, 0, a, b);
46343 assert_eq_m128i(r, a);
46344 let r = _mm_mask_max_epu32(a, 0b00001111, a, b);
46345 let e = _mm_set_epi32(3, 2, 2, 3);
46346 assert_eq_m128i(r, e);
46347 }
46348
46349 #[simd_test(enable = "avx512f,avx512vl")]
46350 const fn test_mm_maskz_max_epu32() {
46351 let a = _mm_set_epi32(0, 1, 2, 3);
46352 let b = _mm_set_epi32(3, 2, 1, 0);
46353 let r = _mm_maskz_max_epu32(0, a, b);
46354 assert_eq_m128i(r, _mm_setzero_si128());
46355 let r = _mm_maskz_max_epu32(0b00001111, a, b);
46356 let e = _mm_set_epi32(3, 2, 2, 3);
46357 assert_eq_m128i(r, e);
46358 }
46359
46360 #[simd_test(enable = "avx512f")]
46361 const fn test_mm512_min_epi32() {
46362 let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
46363 let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
46364 let r = _mm512_min_epi32(a, b);
46365 let e = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
46366 assert_eq_m512i(r, e);
46367 }
46368
46369 #[simd_test(enable = "avx512f")]
46370 const fn test_mm512_mask_min_epi32() {
46371 let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
46372 let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
46373 let r = _mm512_mask_min_epi32(a, 0, a, b);
46374 assert_eq_m512i(r, a);
46375 let r = _mm512_mask_min_epi32(a, 0b00000000_11111111, a, b);
46376 let e = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
46377 assert_eq_m512i(r, e);
46378 }
46379
46380 #[simd_test(enable = "avx512f")]
46381 const fn test_mm512_maskz_min_epi32() {
46382 let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
46383 let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
46384 let r = _mm512_maskz_min_epi32(0, a, b);
46385 assert_eq_m512i(r, _mm512_setzero_si512());
46386 let r = _mm512_maskz_min_epi32(0b00000000_11111111, a, b);
46387 let e = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 0, 0, 0, 0, 0, 0, 0, 0);
46388 assert_eq_m512i(r, e);
46389 }
46390
46391 #[simd_test(enable = "avx512f,avx512vl")]
46392 const fn test_mm256_mask_min_epi32() {
46393 let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
46394 let b = _mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0);
46395 let r = _mm256_mask_min_epi32(a, 0, a, b);
46396 assert_eq_m256i(r, a);
46397 let r = _mm256_mask_min_epi32(a, 0b11111111, a, b);
46398 let e = _mm256_set_epi32(0, 1, 2, 3, 3, 2, 1, 0);
46399 assert_eq_m256i(r, e);
46400 }
46401
46402 #[simd_test(enable = "avx512f,avx512vl")]
46403 const fn test_mm256_maskz_min_epi32() {
46404 let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
46405 let b = _mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0);
46406 let r = _mm256_maskz_min_epi32(0, a, b);
46407 assert_eq_m256i(r, _mm256_setzero_si256());
46408 let r = _mm256_maskz_min_epi32(0b11111111, a, b);
46409 let e = _mm256_set_epi32(0, 1, 2, 3, 3, 2, 1, 0);
46410 assert_eq_m256i(r, e);
46411 }
46412
46413 #[simd_test(enable = "avx512f,avx512vl")]
46414 const fn test_mm_mask_min_epi32() {
46415 let a = _mm_set_epi32(0, 1, 2, 3);
46416 let b = _mm_set_epi32(3, 2, 1, 0);
46417 let r = _mm_mask_min_epi32(a, 0, a, b);
46418 assert_eq_m128i(r, a);
46419 let r = _mm_mask_min_epi32(a, 0b00001111, a, b);
46420 let e = _mm_set_epi32(0, 1, 1, 0);
46421 assert_eq_m128i(r, e);
46422 }
46423
46424 #[simd_test(enable = "avx512f,avx512vl")]
46425 const fn test_mm_maskz_min_epi32() {
46426 let a = _mm_set_epi32(0, 1, 2, 3);
46427 let b = _mm_set_epi32(3, 2, 1, 0);
46428 let r = _mm_maskz_min_epi32(0, a, b);
46429 assert_eq_m128i(r, _mm_setzero_si128());
46430 let r = _mm_maskz_min_epi32(0b00001111, a, b);
46431 let e = _mm_set_epi32(0, 1, 1, 0);
46432 assert_eq_m128i(r, e);
46433 }
46434
46435 #[simd_test(enable = "avx512f")]
46436 fn test_mm512_min_ps() {
46437 let a = _mm512_setr_ps(
46438 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
46439 );
46440 let b = _mm512_setr_ps(
46441 15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
46442 );
46443 let r = _mm512_min_ps(a, b);
46444 let e = _mm512_setr_ps(
46445 0., 1., 2., 3., 4., 5., 6., 7., 7., 6., 5., 4., 3., 2., 1., 0.,
46446 );
46447 assert_eq_m512(r, e);
46448 }
46449
46450 #[simd_test(enable = "avx512f")]
46451 fn test_mm512_mask_min_ps() {
46452 let a = _mm512_setr_ps(
46453 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
46454 );
46455 let b = _mm512_setr_ps(
46456 15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
46457 );
46458 let r = _mm512_mask_min_ps(a, 0, a, b);
46459 assert_eq_m512(r, a);
46460 let r = _mm512_mask_min_ps(a, 0b00000000_11111111, a, b);
46461 let e = _mm512_setr_ps(
46462 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
46463 );
46464 assert_eq_m512(r, e);
46465 }
46466
46467 #[simd_test(enable = "avx512f")]
46468 fn test_mm512_maskz_min_ps() {
46469 let a = _mm512_setr_ps(
46470 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
46471 );
46472 let b = _mm512_setr_ps(
46473 15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
46474 );
46475 let r = _mm512_maskz_min_ps(0, a, b);
46476 assert_eq_m512(r, _mm512_setzero_ps());
46477 let r = _mm512_maskz_min_ps(0b00000000_11111111, a, b);
46478 let e = _mm512_setr_ps(
46479 0., 1., 2., 3., 4., 5., 6., 7., 0., 0., 0., 0., 0., 0., 0., 0.,
46480 );
46481 assert_eq_m512(r, e);
46482 }
46483
46484 #[simd_test(enable = "avx512f,avx512vl")]
46485 fn test_mm256_mask_min_ps() {
46486 let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
46487 let b = _mm256_set_ps(7., 6., 5., 4., 3., 2., 1., 0.);
46488 let r = _mm256_mask_min_ps(a, 0, a, b);
46489 assert_eq_m256(r, a);
46490 let r = _mm256_mask_min_ps(a, 0b11111111, a, b);
46491 let e = _mm256_set_ps(0., 1., 2., 3., 3., 2., 1., 0.);
46492 assert_eq_m256(r, e);
46493 }
46494
46495 #[simd_test(enable = "avx512f,avx512vl")]
46496 fn test_mm256_maskz_min_ps() {
46497 let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
46498 let b = _mm256_set_ps(7., 6., 5., 4., 3., 2., 1., 0.);
46499 let r = _mm256_maskz_min_ps(0, a, b);
46500 assert_eq_m256(r, _mm256_setzero_ps());
46501 let r = _mm256_maskz_min_ps(0b11111111, a, b);
46502 let e = _mm256_set_ps(0., 1., 2., 3., 3., 2., 1., 0.);
46503 assert_eq_m256(r, e);
46504 }
46505
46506 #[simd_test(enable = "avx512f,avx512vl")]
46507 fn test_mm_mask_min_ps() {
46508 let a = _mm_set_ps(0., 1., 2., 3.);
46509 let b = _mm_set_ps(3., 2., 1., 0.);
46510 let r = _mm_mask_min_ps(a, 0, a, b);
46511 assert_eq_m128(r, a);
46512 let r = _mm_mask_min_ps(a, 0b00001111, a, b);
46513 let e = _mm_set_ps(0., 1., 1., 0.);
46514 assert_eq_m128(r, e);
46515 }
46516
46517 #[simd_test(enable = "avx512f,avx512vl")]
46518 fn test_mm_maskz_min_ps() {
46519 let a = _mm_set_ps(0., 1., 2., 3.);
46520 let b = _mm_set_ps(3., 2., 1., 0.);
46521 let r = _mm_maskz_min_ps(0, a, b);
46522 assert_eq_m128(r, _mm_setzero_ps());
46523 let r = _mm_maskz_min_ps(0b00001111, a, b);
46524 let e = _mm_set_ps(0., 1., 1., 0.);
46525 assert_eq_m128(r, e);
46526 }
46527
46528 #[simd_test(enable = "avx512f")]
46529 const fn test_mm512_min_epu32() {
46530 let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
46531 let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
46532 let r = _mm512_min_epu32(a, b);
46533 let e = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
46534 assert_eq_m512i(r, e);
46535 }
46536
46537 #[simd_test(enable = "avx512f")]
46538 const fn test_mm512_mask_min_epu32() {
46539 let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
46540 let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
46541 let r = _mm512_mask_min_epu32(a, 0, a, b);
46542 assert_eq_m512i(r, a);
46543 let r = _mm512_mask_min_epu32(a, 0b00000000_11111111, a, b);
46544 let e = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
46545 assert_eq_m512i(r, e);
46546 }
46547
46548 #[simd_test(enable = "avx512f")]
46549 const fn test_mm512_maskz_min_epu32() {
46550 let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
46551 let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
46552 let r = _mm512_maskz_min_epu32(0, a, b);
46553 assert_eq_m512i(r, _mm512_setzero_si512());
46554 let r = _mm512_maskz_min_epu32(0b00000000_11111111, a, b);
46555 let e = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 0, 0, 0, 0, 0, 0, 0, 0);
46556 assert_eq_m512i(r, e);
46557 }
46558
46559 #[simd_test(enable = "avx512f,avx512vl")]
46560 const fn test_mm256_mask_min_epu32() {
46561 let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
46562 let b = _mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0);
46563 let r = _mm256_mask_min_epu32(a, 0, a, b);
46564 assert_eq_m256i(r, a);
46565 let r = _mm256_mask_min_epu32(a, 0b11111111, a, b);
46566 let e = _mm256_set_epi32(0, 1, 2, 3, 3, 2, 1, 0);
46567 assert_eq_m256i(r, e);
46568 }
46569
46570 #[simd_test(enable = "avx512f,avx512vl")]
46571 const fn test_mm256_maskz_min_epu32() {
46572 let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
46573 let b = _mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0);
46574 let r = _mm256_maskz_min_epu32(0, a, b);
46575 assert_eq_m256i(r, _mm256_setzero_si256());
46576 let r = _mm256_maskz_min_epu32(0b11111111, a, b);
46577 let e = _mm256_set_epi32(0, 1, 2, 3, 3, 2, 1, 0);
46578 assert_eq_m256i(r, e);
46579 }
46580
46581 #[simd_test(enable = "avx512f,avx512vl")]
46582 const fn test_mm_mask_min_epu32() {
46583 let a = _mm_set_epi32(0, 1, 2, 3);
46584 let b = _mm_set_epi32(3, 2, 1, 0);
46585 let r = _mm_mask_min_epu32(a, 0, a, b);
46586 assert_eq_m128i(r, a);
46587 let r = _mm_mask_min_epu32(a, 0b00001111, a, b);
46588 let e = _mm_set_epi32(0, 1, 1, 0);
46589 assert_eq_m128i(r, e);
46590 }
46591
46592 #[simd_test(enable = "avx512f,avx512vl")]
46593 const fn test_mm_maskz_min_epu32() {
46594 let a = _mm_set_epi32(0, 1, 2, 3);
46595 let b = _mm_set_epi32(3, 2, 1, 0);
46596 let r = _mm_maskz_min_epu32(0, a, b);
46597 assert_eq_m128i(r, _mm_setzero_si128());
46598 let r = _mm_maskz_min_epu32(0b00001111, a, b);
46599 let e = _mm_set_epi32(0, 1, 1, 0);
46600 assert_eq_m128i(r, e);
46601 }
46602
46603 #[simd_test(enable = "avx512f")]
46604 fn test_mm512_sqrt_ps() {
46605 let a = _mm512_setr_ps(
46606 0., 1., 4., 9., 16., 25., 36., 49., 64., 81., 100., 121., 144., 169., 196., 225.,
46607 );
46608 let r = _mm512_sqrt_ps(a);
46609 let e = _mm512_setr_ps(
46610 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
46611 );
46612 assert_eq_m512(r, e);
46613 }
46614
46615 #[simd_test(enable = "avx512f")]
46616 fn test_mm512_mask_sqrt_ps() {
46617 let a = _mm512_setr_ps(
46618 0., 1., 4., 9., 16., 25., 36., 49., 64., 81., 100., 121., 144., 169., 196., 225.,
46619 );
46620 let r = _mm512_mask_sqrt_ps(a, 0, a);
46621 assert_eq_m512(r, a);
46622 let r = _mm512_mask_sqrt_ps(a, 0b00000000_11111111, a);
46623 let e = _mm512_setr_ps(
46624 0., 1., 2., 3., 4., 5., 6., 7., 64., 81., 100., 121., 144., 169., 196., 225.,
46625 );
46626 assert_eq_m512(r, e);
46627 }
46628
46629 #[simd_test(enable = "avx512f")]
46630 fn test_mm512_maskz_sqrt_ps() {
46631 let a = _mm512_setr_ps(
46632 0., 1., 4., 9., 16., 25., 36., 49., 64., 81., 100., 121., 144., 169., 196., 225.,
46633 );
46634 let r = _mm512_maskz_sqrt_ps(0, a);
46635 assert_eq_m512(r, _mm512_setzero_ps());
46636 let r = _mm512_maskz_sqrt_ps(0b00000000_11111111, a);
46637 let e = _mm512_setr_ps(
46638 0., 1., 2., 3., 4., 5., 6., 7., 0., 0., 0., 0., 0., 0., 0., 0.,
46639 );
46640 assert_eq_m512(r, e);
46641 }
46642
46643 #[simd_test(enable = "avx512f,avx512vl")]
46644 fn test_mm256_mask_sqrt_ps() {
46645 let a = _mm256_set_ps(0., 1., 4., 9., 16., 25., 36., 49.);
46646 let r = _mm256_mask_sqrt_ps(a, 0, a);
46647 assert_eq_m256(r, a);
46648 let r = _mm256_mask_sqrt_ps(a, 0b11111111, a);
46649 let e = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
46650 assert_eq_m256(r, e);
46651 }
46652
46653 #[simd_test(enable = "avx512f,avx512vl")]
46654 fn test_mm256_maskz_sqrt_ps() {
46655 let a = _mm256_set_ps(0., 1., 4., 9., 16., 25., 36., 49.);
46656 let r = _mm256_maskz_sqrt_ps(0, a);
46657 assert_eq_m256(r, _mm256_setzero_ps());
46658 let r = _mm256_maskz_sqrt_ps(0b11111111, a);
46659 let e = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
46660 assert_eq_m256(r, e);
46661 }
46662
46663 #[simd_test(enable = "avx512f,avx512vl")]
46664 fn test_mm_mask_sqrt_ps() {
46665 let a = _mm_set_ps(0., 1., 4., 9.);
46666 let r = _mm_mask_sqrt_ps(a, 0, a);
46667 assert_eq_m128(r, a);
46668 let r = _mm_mask_sqrt_ps(a, 0b00001111, a);
46669 let e = _mm_set_ps(0., 1., 2., 3.);
46670 assert_eq_m128(r, e);
46671 }
46672
46673 #[simd_test(enable = "avx512f,avx512vl")]
46674 fn test_mm_maskz_sqrt_ps() {
46675 let a = _mm_set_ps(0., 1., 4., 9.);
46676 let r = _mm_maskz_sqrt_ps(0, a);
46677 assert_eq_m128(r, _mm_setzero_ps());
46678 let r = _mm_maskz_sqrt_ps(0b00001111, a);
46679 let e = _mm_set_ps(0., 1., 2., 3.);
46680 assert_eq_m128(r, e);
46681 }
46682
46683 #[simd_test(enable = "avx512f")]
46684 const fn test_mm512_fmadd_ps() {
46685 let a = _mm512_set1_ps(1.);
46686 let b = _mm512_setr_ps(
46687 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
46688 );
46689 let c = _mm512_set1_ps(1.);
46690 let r = _mm512_fmadd_ps(a, b, c);
46691 let e = _mm512_setr_ps(
46692 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
46693 );
46694 assert_eq_m512(r, e);
46695 }
46696
46697 #[simd_test(enable = "avx512f")]
46698 const fn test_mm512_mask_fmadd_ps() {
46699 let a = _mm512_set1_ps(1.);
46700 let b = _mm512_setr_ps(
46701 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
46702 );
46703 let c = _mm512_set1_ps(1.);
46704 let r = _mm512_mask_fmadd_ps(a, 0, b, c);
46705 assert_eq_m512(r, a);
46706 let r = _mm512_mask_fmadd_ps(a, 0b00000000_11111111, b, c);
46707 let e = _mm512_setr_ps(
46708 1., 2., 3., 4., 5., 6., 7., 8., 1., 1., 1., 1., 1., 1., 1., 1.,
46709 );
46710 assert_eq_m512(r, e);
46711 }
46712
46713 #[simd_test(enable = "avx512f")]
46714 const fn test_mm512_maskz_fmadd_ps() {
46715 let a = _mm512_set1_ps(1.);
46716 let b = _mm512_setr_ps(
46717 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
46718 );
46719 let c = _mm512_set1_ps(1.);
46720 let r = _mm512_maskz_fmadd_ps(0, a, b, c);
46721 assert_eq_m512(r, _mm512_setzero_ps());
46722 let r = _mm512_maskz_fmadd_ps(0b00000000_11111111, a, b, c);
46723 let e = _mm512_setr_ps(
46724 1., 2., 3., 4., 5., 6., 7., 8., 0., 0., 0., 0., 0., 0., 0., 0.,
46725 );
46726 assert_eq_m512(r, e);
46727 }
46728
46729 #[simd_test(enable = "avx512f")]
46730 const fn test_mm512_mask3_fmadd_ps() {
46731 let a = _mm512_set1_ps(1.);
46732 let b = _mm512_setr_ps(
46733 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
46734 );
46735 let c = _mm512_set1_ps(2.);
46736 let r = _mm512_mask3_fmadd_ps(a, b, c, 0);
46737 assert_eq_m512(r, c);
46738 let r = _mm512_mask3_fmadd_ps(a, b, c, 0b00000000_11111111);
46739 let e = _mm512_setr_ps(
46740 2., 3., 4., 5., 6., 7., 8., 9., 2., 2., 2., 2., 2., 2., 2., 2.,
46741 );
46742 assert_eq_m512(r, e);
46743 }
46744
46745 #[simd_test(enable = "avx512f,avx512vl")]
46746 const fn test_mm256_mask_fmadd_ps() {
46747 let a = _mm256_set1_ps(1.);
46748 let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
46749 let c = _mm256_set1_ps(1.);
46750 let r = _mm256_mask_fmadd_ps(a, 0, b, c);
46751 assert_eq_m256(r, a);
46752 let r = _mm256_mask_fmadd_ps(a, 0b11111111, b, c);
46753 let e = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
46754 assert_eq_m256(r, e);
46755 }
46756
46757 #[simd_test(enable = "avx512f,avx512vl")]
46758 const fn test_mm256_maskz_fmadd_ps() {
46759 let a = _mm256_set1_ps(1.);
46760 let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
46761 let c = _mm256_set1_ps(1.);
46762 let r = _mm256_maskz_fmadd_ps(0, a, b, c);
46763 assert_eq_m256(r, _mm256_setzero_ps());
46764 let r = _mm256_maskz_fmadd_ps(0b11111111, a, b, c);
46765 let e = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
46766 assert_eq_m256(r, e);
46767 }
46768
46769 #[simd_test(enable = "avx512f,avx512vl")]
46770 const fn test_mm256_mask3_fmadd_ps() {
46771 let a = _mm256_set1_ps(1.);
46772 let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
46773 let c = _mm256_set1_ps(1.);
46774 let r = _mm256_mask3_fmadd_ps(a, b, c, 0);
46775 assert_eq_m256(r, c);
46776 let r = _mm256_mask3_fmadd_ps(a, b, c, 0b11111111);
46777 let e = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
46778 assert_eq_m256(r, e);
46779 }
46780
46781 #[simd_test(enable = "avx512f,avx512vl")]
46782 const fn test_mm_mask_fmadd_ps() {
46783 let a = _mm_set1_ps(1.);
46784 let b = _mm_set_ps(0., 1., 2., 3.);
46785 let c = _mm_set1_ps(1.);
46786 let r = _mm_mask_fmadd_ps(a, 0, b, c);
46787 assert_eq_m128(r, a);
46788 let r = _mm_mask_fmadd_ps(a, 0b00001111, b, c);
46789 let e = _mm_set_ps(1., 2., 3., 4.);
46790 assert_eq_m128(r, e);
46791 }
46792
46793 #[simd_test(enable = "avx512f,avx512vl")]
46794 const fn test_mm_maskz_fmadd_ps() {
46795 let a = _mm_set1_ps(1.);
46796 let b = _mm_set_ps(0., 1., 2., 3.);
46797 let c = _mm_set1_ps(1.);
46798 let r = _mm_maskz_fmadd_ps(0, a, b, c);
46799 assert_eq_m128(r, _mm_setzero_ps());
46800 let r = _mm_maskz_fmadd_ps(0b00001111, a, b, c);
46801 let e = _mm_set_ps(1., 2., 3., 4.);
46802 assert_eq_m128(r, e);
46803 }
46804
46805 #[simd_test(enable = "avx512f,avx512vl")]
46806 const fn test_mm_mask3_fmadd_ps() {
46807 let a = _mm_set1_ps(1.);
46808 let b = _mm_set_ps(0., 1., 2., 3.);
46809 let c = _mm_set1_ps(1.);
46810 let r = _mm_mask3_fmadd_ps(a, b, c, 0);
46811 assert_eq_m128(r, c);
46812 let r = _mm_mask3_fmadd_ps(a, b, c, 0b00001111);
46813 let e = _mm_set_ps(1., 2., 3., 4.);
46814 assert_eq_m128(r, e);
46815 }
46816
46817 #[simd_test(enable = "avx512f")]
46818 const fn test_mm512_fmsub_ps() {
46819 let a = _mm512_setr_ps(
46820 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
46821 );
46822 let b = _mm512_setr_ps(
46823 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
46824 );
46825 let c = _mm512_setr_ps(
46826 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
46827 );
46828 let r = _mm512_fmsub_ps(a, b, c);
46829 let e = _mm512_setr_ps(
46830 -1., 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14.,
46831 );
46832 assert_eq_m512(r, e);
46833 }
46834
46835 #[simd_test(enable = "avx512f")]
46836 const fn test_mm512_mask_fmsub_ps() {
46837 let a = _mm512_set1_ps(1.);
46838 let b = _mm512_setr_ps(
46839 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
46840 );
46841 let c = _mm512_set1_ps(1.);
46842 let r = _mm512_mask_fmsub_ps(a, 0, b, c);
46843 assert_eq_m512(r, a);
46844 let r = _mm512_mask_fmsub_ps(a, 0b00000000_11111111, b, c);
46845 let e = _mm512_setr_ps(
46846 -1., 0., 1., 2., 3., 4., 5., 6., 1., 1., 1., 1., 1., 1., 1., 1.,
46847 );
46848 assert_eq_m512(r, e);
46849 }
46850
46851 #[simd_test(enable = "avx512f")]
46852 const fn test_mm512_maskz_fmsub_ps() {
46853 let a = _mm512_set1_ps(1.);
46854 let b = _mm512_setr_ps(
46855 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
46856 );
46857 let c = _mm512_set1_ps(1.);
46858 let r = _mm512_maskz_fmsub_ps(0, a, b, c);
46859 assert_eq_m512(r, _mm512_setzero_ps());
46860 let r = _mm512_maskz_fmsub_ps(0b00000000_11111111, a, b, c);
46861 let e = _mm512_setr_ps(
46862 -1., 0., 1., 2., 3., 4., 5., 6., 0., 0., 0., 0., 0., 0., 0., 0.,
46863 );
46864 assert_eq_m512(r, e);
46865 }
46866
46867 #[simd_test(enable = "avx512f")]
46868 const fn test_mm512_mask3_fmsub_ps() {
46869 let a = _mm512_set1_ps(1.);
46870 let b = _mm512_setr_ps(
46871 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
46872 );
46873 let c = _mm512_setr_ps(
46874 1., 1., 1., 1., 1., 1., 1., 1., 2., 2., 2., 2., 2., 2., 2., 2.,
46875 );
46876 let r = _mm512_mask3_fmsub_ps(a, b, c, 0);
46877 assert_eq_m512(r, c);
46878 let r = _mm512_mask3_fmsub_ps(a, b, c, 0b00000000_11111111);
46879 let e = _mm512_setr_ps(
46880 -1., 0., 1., 2., 3., 4., 5., 6., 2., 2., 2., 2., 2., 2., 2., 2.,
46881 );
46882 assert_eq_m512(r, e);
46883 }
46884
46885 #[simd_test(enable = "avx512f,avx512vl")]
46886 const fn test_mm256_mask_fmsub_ps() {
46887 let a = _mm256_set1_ps(1.);
46888 let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
46889 let c = _mm256_set1_ps(1.);
46890 let r = _mm256_mask_fmsub_ps(a, 0, b, c);
46891 assert_eq_m256(r, a);
46892 let r = _mm256_mask_fmsub_ps(a, 0b11111111, b, c);
46893 let e = _mm256_set_ps(-1., 0., 1., 2., 3., 4., 5., 6.);
46894 assert_eq_m256(r, e);
46895 }
46896
46897 #[simd_test(enable = "avx512f,avx512vl")]
46898 const fn test_mm256_maskz_fmsub_ps() {
46899 let a = _mm256_set1_ps(1.);
46900 let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
46901 let c = _mm256_set1_ps(1.);
46902 let r = _mm256_maskz_fmsub_ps(0, a, b, c);
46903 assert_eq_m256(r, _mm256_setzero_ps());
46904 let r = _mm256_maskz_fmsub_ps(0b11111111, a, b, c);
46905 let e = _mm256_set_ps(-1., 0., 1., 2., 3., 4., 5., 6.);
46906 assert_eq_m256(r, e);
46907 }
46908
46909 #[simd_test(enable = "avx512f,avx512vl")]
46910 const fn test_mm256_mask3_fmsub_ps() {
46911 let a = _mm256_set1_ps(1.);
46912 let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
46913 let c = _mm256_set1_ps(1.);
46914 let r = _mm256_mask3_fmsub_ps(a, b, c, 0);
46915 assert_eq_m256(r, c);
46916 let r = _mm256_mask3_fmsub_ps(a, b, c, 0b11111111);
46917 let e = _mm256_set_ps(-1., 0., 1., 2., 3., 4., 5., 6.);
46918 assert_eq_m256(r, e);
46919 }
46920
46921 #[simd_test(enable = "avx512f,avx512vl")]
46922 const fn test_mm_mask_fmsub_ps() {
46923 let a = _mm_set1_ps(1.);
46924 let b = _mm_set_ps(0., 1., 2., 3.);
46925 let c = _mm_set1_ps(1.);
46926 let r = _mm_mask_fmsub_ps(a, 0, b, c);
46927 assert_eq_m128(r, a);
46928 let r = _mm_mask_fmsub_ps(a, 0b00001111, b, c);
46929 let e = _mm_set_ps(-1., 0., 1., 2.);
46930 assert_eq_m128(r, e);
46931 }
46932
46933 #[simd_test(enable = "avx512f,avx512vl")]
46934 const fn test_mm_maskz_fmsub_ps() {
46935 let a = _mm_set1_ps(1.);
46936 let b = _mm_set_ps(0., 1., 2., 3.);
46937 let c = _mm_set1_ps(1.);
46938 let r = _mm_maskz_fmsub_ps(0, a, b, c);
46939 assert_eq_m128(r, _mm_setzero_ps());
46940 let r = _mm_maskz_fmsub_ps(0b00001111, a, b, c);
46941 let e = _mm_set_ps(-1., 0., 1., 2.);
46942 assert_eq_m128(r, e);
46943 }
46944
46945 #[simd_test(enable = "avx512f,avx512vl")]
46946 const fn test_mm_mask3_fmsub_ps() {
46947 let a = _mm_set1_ps(1.);
46948 let b = _mm_set_ps(0., 1., 2., 3.);
46949 let c = _mm_set1_ps(1.);
46950 let r = _mm_mask3_fmsub_ps(a, b, c, 0);
46951 assert_eq_m128(r, c);
46952 let r = _mm_mask3_fmsub_ps(a, b, c, 0b00001111);
46953 let e = _mm_set_ps(-1., 0., 1., 2.);
46954 assert_eq_m128(r, e);
46955 }
46956
46957 #[simd_test(enable = "avx512f")]
46958 const fn test_mm512_fmaddsub_ps() {
46959 let a = _mm512_set1_ps(1.);
46960 let b = _mm512_setr_ps(
46961 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
46962 );
46963 let c = _mm512_set1_ps(1.);
46964 let r = _mm512_fmaddsub_ps(a, b, c);
46965 let e = _mm512_setr_ps(
46966 -1., 2., 1., 4., 3., 6., 5., 8., 7., 10., 9., 12., 11., 14., 13., 16.,
46967 );
46968 assert_eq_m512(r, e);
46969 }
46970
46971 #[simd_test(enable = "avx512f")]
46972 const fn test_mm512_mask_fmaddsub_ps() {
46973 let a = _mm512_set1_ps(1.);
46974 let b = _mm512_setr_ps(
46975 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
46976 );
46977 let c = _mm512_set1_ps(1.);
46978 let r = _mm512_mask_fmaddsub_ps(a, 0, b, c);
46979 assert_eq_m512(r, a);
46980 let r = _mm512_mask_fmaddsub_ps(a, 0b00000000_11111111, b, c);
46981 let e = _mm512_setr_ps(
46982 -1., 2., 1., 4., 3., 6., 5., 8., 1., 1., 1., 1., 1., 1., 1., 1.,
46983 );
46984 assert_eq_m512(r, e);
46985 }
46986
46987 #[simd_test(enable = "avx512f")]
46988 const fn test_mm512_maskz_fmaddsub_ps() {
46989 let a = _mm512_set1_ps(1.);
46990 let b = _mm512_setr_ps(
46991 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
46992 );
46993 let c = _mm512_set1_ps(1.);
46994 let r = _mm512_maskz_fmaddsub_ps(0, a, b, c);
46995 assert_eq_m512(r, _mm512_setzero_ps());
46996 let r = _mm512_maskz_fmaddsub_ps(0b00000000_11111111, a, b, c);
46997 let e = _mm512_setr_ps(
46998 -1., 2., 1., 4., 3., 6., 5., 8., 0., 0., 0., 0., 0., 0., 0., 0.,
46999 );
47000 assert_eq_m512(r, e);
47001 }
47002
47003 #[simd_test(enable = "avx512f")]
47004 const fn test_mm512_mask3_fmaddsub_ps() {
47005 let a = _mm512_set1_ps(1.);
47006 let b = _mm512_setr_ps(
47007 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
47008 );
47009 let c = _mm512_setr_ps(
47010 1., 1., 1., 1., 1., 1., 1., 1., 2., 2., 2., 2., 2., 2., 2., 2.,
47011 );
47012 let r = _mm512_mask3_fmaddsub_ps(a, b, c, 0);
47013 assert_eq_m512(r, c);
47014 let r = _mm512_mask3_fmaddsub_ps(a, b, c, 0b00000000_11111111);
47015 let e = _mm512_setr_ps(
47016 -1., 2., 1., 4., 3., 6., 5., 8., 2., 2., 2., 2., 2., 2., 2., 2.,
47017 );
47018 assert_eq_m512(r, e);
47019 }
47020
47021 #[simd_test(enable = "avx512f,avx512vl")]
47022 const fn test_mm256_mask_fmaddsub_ps() {
47023 let a = _mm256_set1_ps(1.);
47024 let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
47025 let c = _mm256_set1_ps(1.);
47026 let r = _mm256_mask_fmaddsub_ps(a, 0, b, c);
47027 assert_eq_m256(r, a);
47028 let r = _mm256_mask_fmaddsub_ps(a, 0b11111111, b, c);
47029 let e = _mm256_set_ps(1., 0., 3., 2., 5., 4., 7., 6.);
47030 assert_eq_m256(r, e);
47031 }
47032
47033 #[simd_test(enable = "avx512f,avx512vl")]
47034 const fn test_mm256_maskz_fmaddsub_ps() {
47035 let a = _mm256_set1_ps(1.);
47036 let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
47037 let c = _mm256_set1_ps(1.);
47038 let r = _mm256_maskz_fmaddsub_ps(0, a, b, c);
47039 assert_eq_m256(r, _mm256_setzero_ps());
47040 let r = _mm256_maskz_fmaddsub_ps(0b11111111, a, b, c);
47041 let e = _mm256_set_ps(1., 0., 3., 2., 5., 4., 7., 6.);
47042 assert_eq_m256(r, e);
47043 }
47044
47045 #[simd_test(enable = "avx512f,avx512vl")]
47046 const fn test_mm256_mask3_fmaddsub_ps() {
47047 let a = _mm256_set1_ps(1.);
47048 let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
47049 let c = _mm256_set1_ps(1.);
47050 let r = _mm256_mask3_fmaddsub_ps(a, b, c, 0);
47051 assert_eq_m256(r, c);
47052 let r = _mm256_mask3_fmaddsub_ps(a, b, c, 0b11111111);
47053 let e = _mm256_set_ps(1., 0., 3., 2., 5., 4., 7., 6.);
47054 assert_eq_m256(r, e);
47055 }
47056
47057 #[simd_test(enable = "avx512f,avx512vl")]
47058 const fn test_mm_mask_fmaddsub_ps() {
47059 let a = _mm_set1_ps(1.);
47060 let b = _mm_set_ps(0., 1., 2., 3.);
47061 let c = _mm_set1_ps(1.);
47062 let r = _mm_mask_fmaddsub_ps(a, 0, b, c);
47063 assert_eq_m128(r, a);
47064 let r = _mm_mask_fmaddsub_ps(a, 0b00001111, b, c);
47065 let e = _mm_set_ps(1., 0., 3., 2.);
47066 assert_eq_m128(r, e);
47067 }
47068
47069 #[simd_test(enable = "avx512f,avx512vl")]
47070 const fn test_mm_maskz_fmaddsub_ps() {
47071 let a = _mm_set1_ps(1.);
47072 let b = _mm_set_ps(0., 1., 2., 3.);
47073 let c = _mm_set1_ps(1.);
47074 let r = _mm_maskz_fmaddsub_ps(0, a, b, c);
47075 assert_eq_m128(r, _mm_setzero_ps());
47076 let r = _mm_maskz_fmaddsub_ps(0b00001111, a, b, c);
47077 let e = _mm_set_ps(1., 0., 3., 2.);
47078 assert_eq_m128(r, e);
47079 }
47080
47081 #[simd_test(enable = "avx512f,avx512vl")]
47082 const fn test_mm_mask3_fmaddsub_ps() {
47083 let a = _mm_set1_ps(1.);
47084 let b = _mm_set_ps(0., 1., 2., 3.);
47085 let c = _mm_set1_ps(1.);
47086 let r = _mm_mask3_fmaddsub_ps(a, b, c, 0);
47087 assert_eq_m128(r, c);
47088 let r = _mm_mask3_fmaddsub_ps(a, b, c, 0b00001111);
47089 let e = _mm_set_ps(1., 0., 3., 2.);
47090 assert_eq_m128(r, e);
47091 }
47092
47093 #[simd_test(enable = "avx512f")]
47094 const fn test_mm512_fmsubadd_ps() {
47095 let a = _mm512_setr_ps(
47096 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
47097 );
47098 let b = _mm512_setr_ps(
47099 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
47100 );
47101 let c = _mm512_setr_ps(
47102 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
47103 );
47104 let r = _mm512_fmsubadd_ps(a, b, c);
47105 let e = _mm512_setr_ps(
47106 1., 0., 3., 2., 5., 4., 7., 6., 9., 8., 11., 10., 13., 12., 15., 14.,
47107 );
47108 assert_eq_m512(r, e);
47109 }
47110
47111 #[simd_test(enable = "avx512f")]
47112 const fn test_mm512_mask_fmsubadd_ps() {
47113 let a = _mm512_set1_ps(1.);
47114 let b = _mm512_setr_ps(
47115 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
47116 );
47117 let c = _mm512_set1_ps(1.);
47118 let r = _mm512_mask_fmsubadd_ps(a, 0, b, c);
47119 assert_eq_m512(r, a);
47120 let r = _mm512_mask_fmsubadd_ps(a, 0b00000000_11111111, b, c);
47121 let e = _mm512_setr_ps(
47122 1., 0., 3., 2., 5., 4., 7., 6., 1., 1., 1., 1., 1., 1., 1., 1.,
47123 );
47124 assert_eq_m512(r, e);
47125 }
47126
47127 #[simd_test(enable = "avx512f")]
47128 const fn test_mm512_maskz_fmsubadd_ps() {
47129 let a = _mm512_set1_ps(1.);
47130 let b = _mm512_setr_ps(
47131 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
47132 );
47133 let c = _mm512_set1_ps(1.);
47134 let r = _mm512_maskz_fmsubadd_ps(0, a, b, c);
47135 assert_eq_m512(r, _mm512_setzero_ps());
47136 let r = _mm512_maskz_fmsubadd_ps(0b00000000_11111111, a, b, c);
47137 let e = _mm512_setr_ps(
47138 1., 0., 3., 2., 5., 4., 7., 6., 0., 0., 0., 0., 0., 0., 0., 0.,
47139 );
47140 assert_eq_m512(r, e);
47141 }
47142
47143 #[simd_test(enable = "avx512f")]
47144 const fn test_mm512_mask3_fmsubadd_ps() {
47145 let a = _mm512_set1_ps(1.);
47146 let b = _mm512_setr_ps(
47147 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
47148 );
47149 let c = _mm512_setr_ps(
47150 1., 1., 1., 1., 1., 1., 1., 1., 2., 2., 2., 2., 2., 2., 2., 2.,
47151 );
47152 let r = _mm512_mask3_fmsubadd_ps(a, b, c, 0);
47153 assert_eq_m512(r, c);
47154 let r = _mm512_mask3_fmsubadd_ps(a, b, c, 0b00000000_11111111);
47155 let e = _mm512_setr_ps(
47156 1., 0., 3., 2., 5., 4., 7., 6., 2., 2., 2., 2., 2., 2., 2., 2.,
47157 );
47158 assert_eq_m512(r, e);
47159 }
47160
47161 #[simd_test(enable = "avx512f,avx512vl")]
47162 const fn test_mm256_mask_fmsubadd_ps() {
47163 let a = _mm256_set1_ps(1.);
47164 let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
47165 let c = _mm256_set1_ps(1.);
47166 let r = _mm256_mask_fmsubadd_ps(a, 0, b, c);
47167 assert_eq_m256(r, a);
47168 let r = _mm256_mask_fmsubadd_ps(a, 0b11111111, b, c);
47169 let e = _mm256_set_ps(-1., 2., 1., 4., 3., 6., 5., 8.);
47170 assert_eq_m256(r, e);
47171 }
47172
47173 #[simd_test(enable = "avx512f,avx512vl")]
47174 const fn test_mm256_maskz_fmsubadd_ps() {
47175 let a = _mm256_set1_ps(1.);
47176 let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
47177 let c = _mm256_set1_ps(1.);
47178 let r = _mm256_maskz_fmsubadd_ps(0, a, b, c);
47179 assert_eq_m256(r, _mm256_setzero_ps());
47180 let r = _mm256_maskz_fmsubadd_ps(0b11111111, a, b, c);
47181 let e = _mm256_set_ps(-1., 2., 1., 4., 3., 6., 5., 8.);
47182 assert_eq_m256(r, e);
47183 }
47184
47185 #[simd_test(enable = "avx512f,avx512vl")]
47186 const fn test_mm256_mask3_fmsubadd_ps() {
47187 let a = _mm256_set1_ps(1.);
47188 let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
47189 let c = _mm256_set1_ps(1.);
47190 let r = _mm256_mask3_fmsubadd_ps(a, b, c, 0);
47191 assert_eq_m256(r, c);
47192 let r = _mm256_mask3_fmsubadd_ps(a, b, c, 0b11111111);
47193 let e = _mm256_set_ps(-1., 2., 1., 4., 3., 6., 5., 8.);
47194 assert_eq_m256(r, e);
47195 }
47196
47197 #[simd_test(enable = "avx512f,avx512vl")]
47198 const fn test_mm_mask_fmsubadd_ps() {
47199 let a = _mm_set1_ps(1.);
47200 let b = _mm_set_ps(0., 1., 2., 3.);
47201 let c = _mm_set1_ps(1.);
47202 let r = _mm_mask_fmsubadd_ps(a, 0, b, c);
47203 assert_eq_m128(r, a);
47204 let r = _mm_mask_fmsubadd_ps(a, 0b00001111, b, c);
47205 let e = _mm_set_ps(-1., 2., 1., 4.);
47206 assert_eq_m128(r, e);
47207 }
47208
47209 #[simd_test(enable = "avx512f,avx512vl")]
47210 const fn test_mm_maskz_fmsubadd_ps() {
47211 let a = _mm_set1_ps(1.);
47212 let b = _mm_set_ps(0., 1., 2., 3.);
47213 let c = _mm_set1_ps(1.);
47214 let r = _mm_maskz_fmsubadd_ps(0, a, b, c);
47215 assert_eq_m128(r, _mm_setzero_ps());
47216 let r = _mm_maskz_fmsubadd_ps(0b00001111, a, b, c);
47217 let e = _mm_set_ps(-1., 2., 1., 4.);
47218 assert_eq_m128(r, e);
47219 }
47220
47221 #[simd_test(enable = "avx512f,avx512vl")]
47222 const fn test_mm_mask3_fmsubadd_ps() {
47223 let a = _mm_set1_ps(1.);
47224 let b = _mm_set_ps(0., 1., 2., 3.);
47225 let c = _mm_set1_ps(1.);
47226 let r = _mm_mask3_fmsubadd_ps(a, b, c, 0);
47227 assert_eq_m128(r, c);
47228 let r = _mm_mask3_fmsubadd_ps(a, b, c, 0b00001111);
47229 let e = _mm_set_ps(-1., 2., 1., 4.);
47230 assert_eq_m128(r, e);
47231 }
47232
47233 #[simd_test(enable = "avx512f")]
47234 const fn test_mm512_fnmadd_ps() {
47235 let a = _mm512_set1_ps(1.);
47236 let b = _mm512_setr_ps(
47237 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
47238 );
47239 let c = _mm512_set1_ps(1.);
47240 let r = _mm512_fnmadd_ps(a, b, c);
47241 let e = _mm512_setr_ps(
47242 1., 0., -1., -2., -3., -4., -5., -6., -7., -8., -9., -10., -11., -12., -13., -14.,
47243 );
47244 assert_eq_m512(r, e);
47245 }
47246
47247 #[simd_test(enable = "avx512f")]
47248 const fn test_mm512_mask_fnmadd_ps() {
47249 let a = _mm512_set1_ps(1.);
47250 let b = _mm512_setr_ps(
47251 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
47252 );
47253 let c = _mm512_set1_ps(1.);
47254 let r = _mm512_mask_fnmadd_ps(a, 0, b, c);
47255 assert_eq_m512(r, a);
47256 let r = _mm512_mask_fnmadd_ps(a, 0b00000000_11111111, b, c);
47257 let e = _mm512_setr_ps(
47258 1., 0., -1., -2., -3., -4., -5., -6., 1., 1., 1., 1., 1., 1., 1., 1.,
47259 );
47260 assert_eq_m512(r, e);
47261 }
47262
47263 #[simd_test(enable = "avx512f")]
47264 const fn test_mm512_maskz_fnmadd_ps() {
47265 let a = _mm512_set1_ps(1.);
47266 let b = _mm512_setr_ps(
47267 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
47268 );
47269 let c = _mm512_set1_ps(1.);
47270 let r = _mm512_maskz_fnmadd_ps(0, a, b, c);
47271 assert_eq_m512(r, _mm512_setzero_ps());
47272 let r = _mm512_maskz_fnmadd_ps(0b00000000_11111111, a, b, c);
47273 let e = _mm512_setr_ps(
47274 1., 0., -1., -2., -3., -4., -5., -6., 0., 0., 0., 0., 0., 0., 0., 0.,
47275 );
47276 assert_eq_m512(r, e);
47277 }
47278
47279 #[simd_test(enable = "avx512f")]
47280 const fn test_mm512_mask3_fnmadd_ps() {
47281 let a = _mm512_set1_ps(1.);
47282 let b = _mm512_setr_ps(
47283 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
47284 );
47285 let c = _mm512_setr_ps(
47286 1., 1., 1., 1., 1., 1., 1., 1., 2., 2., 2., 2., 2., 2., 2., 2.,
47287 );
47288 let r = _mm512_mask3_fnmadd_ps(a, b, c, 0);
47289 assert_eq_m512(r, c);
47290 let r = _mm512_mask3_fnmadd_ps(a, b, c, 0b00000000_11111111);
47291 let e = _mm512_setr_ps(
47292 1., 0., -1., -2., -3., -4., -5., -6., 2., 2., 2., 2., 2., 2., 2., 2.,
47293 );
47294 assert_eq_m512(r, e);
47295 }
47296
47297 #[simd_test(enable = "avx512f,avx512vl")]
47298 const fn test_mm256_mask_fnmadd_ps() {
47299 let a = _mm256_set1_ps(1.);
47300 let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
47301 let c = _mm256_set1_ps(1.);
47302 let r = _mm256_mask_fnmadd_ps(a, 0, b, c);
47303 assert_eq_m256(r, a);
47304 let r = _mm256_mask_fnmadd_ps(a, 0b11111111, b, c);
47305 let e = _mm256_set_ps(1., 0., -1., -2., -3., -4., -5., -6.);
47306 assert_eq_m256(r, e);
47307 }
47308
47309 #[simd_test(enable = "avx512f,avx512vl")]
47310 const fn test_mm256_maskz_fnmadd_ps() {
47311 let a = _mm256_set1_ps(1.);
47312 let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
47313 let c = _mm256_set1_ps(1.);
47314 let r = _mm256_maskz_fnmadd_ps(0, a, b, c);
47315 assert_eq_m256(r, _mm256_setzero_ps());
47316 let r = _mm256_maskz_fnmadd_ps(0b11111111, a, b, c);
47317 let e = _mm256_set_ps(1., 0., -1., -2., -3., -4., -5., -6.);
47318 assert_eq_m256(r, e);
47319 }
47320
47321 #[simd_test(enable = "avx512f,avx512vl")]
47322 const fn test_mm256_mask3_fnmadd_ps() {
47323 let a = _mm256_set1_ps(1.);
47324 let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
47325 let c = _mm256_set1_ps(1.);
47326 let r = _mm256_mask3_fnmadd_ps(a, b, c, 0);
47327 assert_eq_m256(r, c);
47328 let r = _mm256_mask3_fnmadd_ps(a, b, c, 0b11111111);
47329 let e = _mm256_set_ps(1., 0., -1., -2., -3., -4., -5., -6.);
47330 assert_eq_m256(r, e);
47331 }
47332
47333 #[simd_test(enable = "avx512f,avx512vl")]
47334 const fn test_mm_mask_fnmadd_ps() {
47335 let a = _mm_set1_ps(1.);
47336 let b = _mm_set_ps(0., 1., 2., 3.);
47337 let c = _mm_set1_ps(1.);
47338 let r = _mm_mask_fnmadd_ps(a, 0, b, c);
47339 assert_eq_m128(r, a);
47340 let r = _mm_mask_fnmadd_ps(a, 0b00001111, b, c);
47341 let e = _mm_set_ps(1., 0., -1., -2.);
47342 assert_eq_m128(r, e);
47343 }
47344
47345 #[simd_test(enable = "avx512f,avx512vl")]
47346 const fn test_mm_maskz_fnmadd_ps() {
47347 let a = _mm_set1_ps(1.);
47348 let b = _mm_set_ps(0., 1., 2., 3.);
47349 let c = _mm_set1_ps(1.);
47350 let r = _mm_maskz_fnmadd_ps(0, a, b, c);
47351 assert_eq_m128(r, _mm_setzero_ps());
47352 let r = _mm_maskz_fnmadd_ps(0b00001111, a, b, c);
47353 let e = _mm_set_ps(1., 0., -1., -2.);
47354 assert_eq_m128(r, e);
47355 }
47356
47357 #[simd_test(enable = "avx512f,avx512vl")]
47358 const fn test_mm_mask3_fnmadd_ps() {
47359 let a = _mm_set1_ps(1.);
47360 let b = _mm_set_ps(0., 1., 2., 3.);
47361 let c = _mm_set1_ps(1.);
47362 let r = _mm_mask3_fnmadd_ps(a, b, c, 0);
47363 assert_eq_m128(r, c);
47364 let r = _mm_mask3_fnmadd_ps(a, b, c, 0b00001111);
47365 let e = _mm_set_ps(1., 0., -1., -2.);
47366 assert_eq_m128(r, e);
47367 }
47368
47369 #[simd_test(enable = "avx512f")]
47370 const fn test_mm512_fnmsub_ps() {
47371 let a = _mm512_set1_ps(1.);
47372 let b = _mm512_setr_ps(
47373 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
47374 );
47375 let c = _mm512_set1_ps(1.);
47376 let r = _mm512_fnmsub_ps(a, b, c);
47377 let e = _mm512_setr_ps(
47378 -1., -2., -3., -4., -5., -6., -7., -8., -9., -10., -11., -12., -13., -14., -15., -16.,
47379 );
47380 assert_eq_m512(r, e);
47381 }
47382
47383 #[simd_test(enable = "avx512f")]
47384 const fn test_mm512_mask_fnmsub_ps() {
47385 let a = _mm512_set1_ps(1.);
47386 let b = _mm512_setr_ps(
47387 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
47388 );
47389 let c = _mm512_set1_ps(1.);
47390 let r = _mm512_mask_fnmsub_ps(a, 0, b, c);
47391 assert_eq_m512(r, a);
47392 let r = _mm512_mask_fnmsub_ps(a, 0b00000000_11111111, b, c);
47393 let e = _mm512_setr_ps(
47394 -1., -2., -3., -4., -5., -6., -7., -8., 1., 1., 1., 1., 1., 1., 1., 1.,
47395 );
47396 assert_eq_m512(r, e);
47397 }
47398
47399 #[simd_test(enable = "avx512f")]
47400 const fn test_mm512_maskz_fnmsub_ps() {
47401 let a = _mm512_set1_ps(1.);
47402 let b = _mm512_setr_ps(
47403 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
47404 );
47405 let c = _mm512_set1_ps(1.);
47406 let r = _mm512_maskz_fnmsub_ps(0, a, b, c);
47407 assert_eq_m512(r, _mm512_setzero_ps());
47408 let r = _mm512_maskz_fnmsub_ps(0b00000000_11111111, a, b, c);
47409 let e = _mm512_setr_ps(
47410 -1., -2., -3., -4., -5., -6., -7., -8., 0., 0., 0., 0., 0., 0., 0., 0.,
47411 );
47412 assert_eq_m512(r, e);
47413 }
47414
47415 #[simd_test(enable = "avx512f")]
47416 const fn test_mm512_mask3_fnmsub_ps() {
47417 let a = _mm512_set1_ps(1.);
47418 let b = _mm512_setr_ps(
47419 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
47420 );
47421 let c = _mm512_setr_ps(
47422 1., 1., 1., 1., 1., 1., 1., 1., 2., 2., 2., 2., 2., 2., 2., 2.,
47423 );
47424 let r = _mm512_mask3_fnmsub_ps(a, b, c, 0);
47425 assert_eq_m512(r, c);
47426 let r = _mm512_mask3_fnmsub_ps(a, b, c, 0b00000000_11111111);
47427 let e = _mm512_setr_ps(
47428 -1., -2., -3., -4., -5., -6., -7., -8., 2., 2., 2., 2., 2., 2., 2., 2.,
47429 );
47430 assert_eq_m512(r, e);
47431 }
47432
47433 #[simd_test(enable = "avx512f,avx512vl")]
47434 const fn test_mm256_mask_fnmsub_ps() {
47435 let a = _mm256_set1_ps(1.);
47436 let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
47437 let c = _mm256_set1_ps(1.);
47438 let r = _mm256_mask_fnmsub_ps(a, 0, b, c);
47439 assert_eq_m256(r, a);
47440 let r = _mm256_mask_fnmsub_ps(a, 0b11111111, b, c);
47441 let e = _mm256_set_ps(-1., -2., -3., -4., -5., -6., -7., -8.);
47442 assert_eq_m256(r, e);
47443 }
47444
47445 #[simd_test(enable = "avx512f,avx512vl")]
47446 const fn test_mm256_maskz_fnmsub_ps() {
47447 let a = _mm256_set1_ps(1.);
47448 let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
47449 let c = _mm256_set1_ps(1.);
47450 let r = _mm256_maskz_fnmsub_ps(0, a, b, c);
47451 assert_eq_m256(r, _mm256_setzero_ps());
47452 let r = _mm256_maskz_fnmsub_ps(0b11111111, a, b, c);
47453 let e = _mm256_set_ps(-1., -2., -3., -4., -5., -6., -7., -8.);
47454 assert_eq_m256(r, e);
47455 }
47456
47457 #[simd_test(enable = "avx512f,avx512vl")]
47458 const fn test_mm256_mask3_fnmsub_ps() {
47459 let a = _mm256_set1_ps(1.);
47460 let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
47461 let c = _mm256_set1_ps(1.);
47462 let r = _mm256_mask3_fnmsub_ps(a, b, c, 0);
47463 assert_eq_m256(r, c);
47464 let r = _mm256_mask3_fnmsub_ps(a, b, c, 0b11111111);
47465 let e = _mm256_set_ps(-1., -2., -3., -4., -5., -6., -7., -8.);
47466 assert_eq_m256(r, e);
47467 }
47468
47469 #[simd_test(enable = "avx512f,avx512vl")]
47470 const fn test_mm_mask_fnmsub_ps() {
47471 let a = _mm_set1_ps(1.);
47472 let b = _mm_set_ps(0., 1., 2., 3.);
47473 let c = _mm_set1_ps(1.);
47474 let r = _mm_mask_fnmsub_ps(a, 0, b, c);
47475 assert_eq_m128(r, a);
47476 let r = _mm_mask_fnmsub_ps(a, 0b00001111, b, c);
47477 let e = _mm_set_ps(-1., -2., -3., -4.);
47478 assert_eq_m128(r, e);
47479 }
47480
47481 #[simd_test(enable = "avx512f,avx512vl")]
47482 const fn test_mm_maskz_fnmsub_ps() {
47483 let a = _mm_set1_ps(1.);
47484 let b = _mm_set_ps(0., 1., 2., 3.);
47485 let c = _mm_set1_ps(1.);
47486 let r = _mm_maskz_fnmsub_ps(0, a, b, c);
47487 assert_eq_m128(r, _mm_setzero_ps());
47488 let r = _mm_maskz_fnmsub_ps(0b00001111, a, b, c);
47489 let e = _mm_set_ps(-1., -2., -3., -4.);
47490 assert_eq_m128(r, e);
47491 }
47492
47493 #[simd_test(enable = "avx512f,avx512vl")]
47494 const fn test_mm_mask3_fnmsub_ps() {
47495 let a = _mm_set1_ps(1.);
47496 let b = _mm_set_ps(0., 1., 2., 3.);
47497 let c = _mm_set1_ps(1.);
47498 let r = _mm_mask3_fnmsub_ps(a, b, c, 0);
47499 assert_eq_m128(r, c);
47500 let r = _mm_mask3_fnmsub_ps(a, b, c, 0b00001111);
47501 let e = _mm_set_ps(-1., -2., -3., -4.);
47502 assert_eq_m128(r, e);
47503 }
47504
47505 #[simd_test(enable = "avx512f")]
47506 fn test_mm512_rcp14_ps() {
47507 let a = _mm512_set1_ps(3.);
47508 let r = _mm512_rcp14_ps(a);
47509 let e = _mm512_set1_ps(0.33333206);
47510 assert_eq_m512(r, e);
47511 }
47512
47513 #[simd_test(enable = "avx512f")]
47514 fn test_mm512_mask_rcp14_ps() {
47515 let a = _mm512_set1_ps(3.);
47516 let r = _mm512_mask_rcp14_ps(a, 0, a);
47517 assert_eq_m512(r, a);
47518 let r = _mm512_mask_rcp14_ps(a, 0b11111111_00000000, a);
47519 let e = _mm512_setr_ps(
47520 3., 3., 3., 3., 3., 3., 3., 3., 0.33333206, 0.33333206, 0.33333206, 0.33333206,
47521 0.33333206, 0.33333206, 0.33333206, 0.33333206,
47522 );
47523 assert_eq_m512(r, e);
47524 }
47525
47526 #[simd_test(enable = "avx512f")]
47527 fn test_mm512_maskz_rcp14_ps() {
47528 let a = _mm512_set1_ps(3.);
47529 let r = _mm512_maskz_rcp14_ps(0, a);
47530 assert_eq_m512(r, _mm512_setzero_ps());
47531 let r = _mm512_maskz_rcp14_ps(0b11111111_00000000, a);
47532 let e = _mm512_setr_ps(
47533 0., 0., 0., 0., 0., 0., 0., 0., 0.33333206, 0.33333206, 0.33333206, 0.33333206,
47534 0.33333206, 0.33333206, 0.33333206, 0.33333206,
47535 );
47536 assert_eq_m512(r, e);
47537 }
47538
47539 #[simd_test(enable = "avx512f,avx512vl")]
47540 fn test_mm256_rcp14_ps() {
47541 let a = _mm256_set1_ps(3.);
47542 let r = _mm256_rcp14_ps(a);
47543 let e = _mm256_set1_ps(0.33333206);
47544 assert_eq_m256(r, e);
47545 }
47546
47547 #[simd_test(enable = "avx512f,avx512vl")]
47548 fn test_mm256_mask_rcp14_ps() {
47549 let a = _mm256_set1_ps(3.);
47550 let r = _mm256_mask_rcp14_ps(a, 0, a);
47551 assert_eq_m256(r, a);
47552 let r = _mm256_mask_rcp14_ps(a, 0b11111111, a);
47553 let e = _mm256_set1_ps(0.33333206);
47554 assert_eq_m256(r, e);
47555 }
47556
47557 #[simd_test(enable = "avx512f,avx512vl")]
47558 fn test_mm256_maskz_rcp14_ps() {
47559 let a = _mm256_set1_ps(3.);
47560 let r = _mm256_maskz_rcp14_ps(0, a);
47561 assert_eq_m256(r, _mm256_setzero_ps());
47562 let r = _mm256_maskz_rcp14_ps(0b11111111, a);
47563 let e = _mm256_set1_ps(0.33333206);
47564 assert_eq_m256(r, e);
47565 }
47566
47567 #[simd_test(enable = "avx512f,avx512vl")]
47568 fn test_mm_rcp14_ps() {
47569 let a = _mm_set1_ps(3.);
47570 let r = _mm_rcp14_ps(a);
47571 let e = _mm_set1_ps(0.33333206);
47572 assert_eq_m128(r, e);
47573 }
47574
47575 #[simd_test(enable = "avx512f,avx512vl")]
47576 fn test_mm_mask_rcp14_ps() {
47577 let a = _mm_set1_ps(3.);
47578 let r = _mm_mask_rcp14_ps(a, 0, a);
47579 assert_eq_m128(r, a);
47580 let r = _mm_mask_rcp14_ps(a, 0b00001111, a);
47581 let e = _mm_set1_ps(0.33333206);
47582 assert_eq_m128(r, e);
47583 }
47584
47585 #[simd_test(enable = "avx512f,avx512vl")]
47586 fn test_mm_maskz_rcp14_ps() {
47587 let a = _mm_set1_ps(3.);
47588 let r = _mm_maskz_rcp14_ps(0, a);
47589 assert_eq_m128(r, _mm_setzero_ps());
47590 let r = _mm_maskz_rcp14_ps(0b00001111, a);
47591 let e = _mm_set1_ps(0.33333206);
47592 assert_eq_m128(r, e);
47593 }
47594
47595 #[simd_test(enable = "avx512f")]
47596 fn test_mm512_rsqrt14_ps() {
47597 let a = _mm512_set1_ps(3.);
47598 let r = _mm512_rsqrt14_ps(a);
47599 let e = _mm512_set1_ps(0.5773392);
47600 assert_eq_m512(r, e);
47601 }
47602
47603 #[simd_test(enable = "avx512f")]
47604 fn test_mm512_mask_rsqrt14_ps() {
47605 let a = _mm512_set1_ps(3.);
47606 let r = _mm512_mask_rsqrt14_ps(a, 0, a);
47607 assert_eq_m512(r, a);
47608 let r = _mm512_mask_rsqrt14_ps(a, 0b11111111_00000000, a);
47609 let e = _mm512_setr_ps(
47610 3., 3., 3., 3., 3., 3., 3., 3., 0.5773392, 0.5773392, 0.5773392, 0.5773392, 0.5773392,
47611 0.5773392, 0.5773392, 0.5773392,
47612 );
47613 assert_eq_m512(r, e);
47614 }
47615
47616 #[simd_test(enable = "avx512f")]
47617 fn test_mm512_maskz_rsqrt14_ps() {
47618 let a = _mm512_set1_ps(3.);
47619 let r = _mm512_maskz_rsqrt14_ps(0, a);
47620 assert_eq_m512(r, _mm512_setzero_ps());
47621 let r = _mm512_maskz_rsqrt14_ps(0b11111111_00000000, a);
47622 let e = _mm512_setr_ps(
47623 0., 0., 0., 0., 0., 0., 0., 0., 0.5773392, 0.5773392, 0.5773392, 0.5773392, 0.5773392,
47624 0.5773392, 0.5773392, 0.5773392,
47625 );
47626 assert_eq_m512(r, e);
47627 }
47628
47629 #[simd_test(enable = "avx512f,avx512vl")]
47630 fn test_mm256_rsqrt14_ps() {
47631 let a = _mm256_set1_ps(3.);
47632 let r = _mm256_rsqrt14_ps(a);
47633 let e = _mm256_set1_ps(0.5773392);
47634 assert_eq_m256(r, e);
47635 }
47636
47637 #[simd_test(enable = "avx512f,avx512vl")]
47638 fn test_mm256_mask_rsqrt14_ps() {
47639 let a = _mm256_set1_ps(3.);
47640 let r = _mm256_mask_rsqrt14_ps(a, 0, a);
47641 assert_eq_m256(r, a);
47642 let r = _mm256_mask_rsqrt14_ps(a, 0b11111111, a);
47643 let e = _mm256_set1_ps(0.5773392);
47644 assert_eq_m256(r, e);
47645 }
47646
47647 #[simd_test(enable = "avx512f,avx512vl")]
47648 fn test_mm256_maskz_rsqrt14_ps() {
47649 let a = _mm256_set1_ps(3.);
47650 let r = _mm256_maskz_rsqrt14_ps(0, a);
47651 assert_eq_m256(r, _mm256_setzero_ps());
47652 let r = _mm256_maskz_rsqrt14_ps(0b11111111, a);
47653 let e = _mm256_set1_ps(0.5773392);
47654 assert_eq_m256(r, e);
47655 }
47656
47657 #[simd_test(enable = "avx512f,avx512vl")]
47658 fn test_mm_rsqrt14_ps() {
47659 let a = _mm_set1_ps(3.);
47660 let r = _mm_rsqrt14_ps(a);
47661 let e = _mm_set1_ps(0.5773392);
47662 assert_eq_m128(r, e);
47663 }
47664
47665 #[simd_test(enable = "avx512f,avx512vl")]
47666 fn test_mm_mask_rsqrt14_ps() {
47667 let a = _mm_set1_ps(3.);
47668 let r = _mm_mask_rsqrt14_ps(a, 0, a);
47669 assert_eq_m128(r, a);
47670 let r = _mm_mask_rsqrt14_ps(a, 0b00001111, a);
47671 let e = _mm_set1_ps(0.5773392);
47672 assert_eq_m128(r, e);
47673 }
47674
47675 #[simd_test(enable = "avx512f,avx512vl")]
47676 fn test_mm_maskz_rsqrt14_ps() {
47677 let a = _mm_set1_ps(3.);
47678 let r = _mm_maskz_rsqrt14_ps(0, a);
47679 assert_eq_m128(r, _mm_setzero_ps());
47680 let r = _mm_maskz_rsqrt14_ps(0b00001111, a);
47681 let e = _mm_set1_ps(0.5773392);
47682 assert_eq_m128(r, e);
47683 }
47684
47685 #[simd_test(enable = "avx512f")]
47686 fn test_mm512_getexp_ps() {
47687 let a = _mm512_set1_ps(3.);
47688 let r = _mm512_getexp_ps(a);
47689 let e = _mm512_set1_ps(1.);
47690 assert_eq_m512(r, e);
47691 }
47692
47693 #[simd_test(enable = "avx512f")]
47694 fn test_mm512_mask_getexp_ps() {
47695 let a = _mm512_set1_ps(3.);
47696 let r = _mm512_mask_getexp_ps(a, 0, a);
47697 assert_eq_m512(r, a);
47698 let r = _mm512_mask_getexp_ps(a, 0b11111111_00000000, a);
47699 let e = _mm512_setr_ps(
47700 3., 3., 3., 3., 3., 3., 3., 3., 1., 1., 1., 1., 1., 1., 1., 1.,
47701 );
47702 assert_eq_m512(r, e);
47703 }
47704
47705 #[simd_test(enable = "avx512f")]
47706 fn test_mm512_maskz_getexp_ps() {
47707 let a = _mm512_set1_ps(3.);
47708 let r = _mm512_maskz_getexp_ps(0, a);
47709 assert_eq_m512(r, _mm512_setzero_ps());
47710 let r = _mm512_maskz_getexp_ps(0b11111111_00000000, a);
47711 let e = _mm512_setr_ps(
47712 0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 1., 1., 1., 1., 1., 1.,
47713 );
47714 assert_eq_m512(r, e);
47715 }
47716
47717 #[simd_test(enable = "avx512f,avx512vl")]
47718 fn test_mm256_getexp_ps() {
47719 let a = _mm256_set1_ps(3.);
47720 let r = _mm256_getexp_ps(a);
47721 let e = _mm256_set1_ps(1.);
47722 assert_eq_m256(r, e);
47723 }
47724
47725 #[simd_test(enable = "avx512f,avx512vl")]
47726 fn test_mm256_mask_getexp_ps() {
47727 let a = _mm256_set1_ps(3.);
47728 let r = _mm256_mask_getexp_ps(a, 0, a);
47729 assert_eq_m256(r, a);
47730 let r = _mm256_mask_getexp_ps(a, 0b11111111, a);
47731 let e = _mm256_set1_ps(1.);
47732 assert_eq_m256(r, e);
47733 }
47734
47735 #[simd_test(enable = "avx512f,avx512vl")]
47736 fn test_mm256_maskz_getexp_ps() {
47737 let a = _mm256_set1_ps(3.);
47738 let r = _mm256_maskz_getexp_ps(0, a);
47739 assert_eq_m256(r, _mm256_setzero_ps());
47740 let r = _mm256_maskz_getexp_ps(0b11111111, a);
47741 let e = _mm256_set1_ps(1.);
47742 assert_eq_m256(r, e);
47743 }
47744
47745 #[simd_test(enable = "avx512f,avx512vl")]
47746 fn test_mm_getexp_ps() {
47747 let a = _mm_set1_ps(3.);
47748 let r = _mm_getexp_ps(a);
47749 let e = _mm_set1_ps(1.);
47750 assert_eq_m128(r, e);
47751 }
47752
47753 #[simd_test(enable = "avx512f,avx512vl")]
47754 fn test_mm_mask_getexp_ps() {
47755 let a = _mm_set1_ps(3.);
47756 let r = _mm_mask_getexp_ps(a, 0, a);
47757 assert_eq_m128(r, a);
47758 let r = _mm_mask_getexp_ps(a, 0b00001111, a);
47759 let e = _mm_set1_ps(1.);
47760 assert_eq_m128(r, e);
47761 }
47762
47763 #[simd_test(enable = "avx512f,avx512vl")]
47764 fn test_mm_maskz_getexp_ps() {
47765 let a = _mm_set1_ps(3.);
47766 let r = _mm_maskz_getexp_ps(0, a);
47767 assert_eq_m128(r, _mm_setzero_ps());
47768 let r = _mm_maskz_getexp_ps(0b00001111, a);
47769 let e = _mm_set1_ps(1.);
47770 assert_eq_m128(r, e);
47771 }
47772
47773 #[simd_test(enable = "avx512f")]
47774 fn test_mm512_roundscale_ps() {
47775 let a = _mm512_set1_ps(1.1);
47776 let r = _mm512_roundscale_ps::<0b00_00_00_00>(a);
47777 let e = _mm512_set1_ps(1.0);
47778 assert_eq_m512(r, e);
47779 }
47780
47781 #[simd_test(enable = "avx512f")]
47782 fn test_mm512_mask_roundscale_ps() {
47783 let a = _mm512_set1_ps(1.1);
47784 let r = _mm512_mask_roundscale_ps::<0b00_00_00_00>(a, 0, a);
47785 let e = _mm512_set1_ps(1.1);
47786 assert_eq_m512(r, e);
47787 let r = _mm512_mask_roundscale_ps::<0b00_00_00_00>(a, 0b11111111_11111111, a);
47788 let e = _mm512_set1_ps(1.0);
47789 assert_eq_m512(r, e);
47790 }
47791
47792 #[simd_test(enable = "avx512f")]
47793 fn test_mm512_maskz_roundscale_ps() {
47794 let a = _mm512_set1_ps(1.1);
47795 let r = _mm512_maskz_roundscale_ps::<0b00_00_00_00>(0, a);
47796 assert_eq_m512(r, _mm512_setzero_ps());
47797 let r = _mm512_maskz_roundscale_ps::<0b00_00_00_00>(0b11111111_11111111, a);
47798 let e = _mm512_set1_ps(1.0);
47799 assert_eq_m512(r, e);
47800 }
47801
47802 #[simd_test(enable = "avx512f,avx512vl")]
47803 fn test_mm256_roundscale_ps() {
47804 let a = _mm256_set1_ps(1.1);
47805 let r = _mm256_roundscale_ps::<0b00_00_00_00>(a);
47806 let e = _mm256_set1_ps(1.0);
47807 assert_eq_m256(r, e);
47808 }
47809
47810 #[simd_test(enable = "avx512f,avx512vl")]
47811 fn test_mm256_mask_roundscale_ps() {
47812 let a = _mm256_set1_ps(1.1);
47813 let r = _mm256_mask_roundscale_ps::<0b00_00_00_00>(a, 0, a);
47814 let e = _mm256_set1_ps(1.1);
47815 assert_eq_m256(r, e);
47816 let r = _mm256_mask_roundscale_ps::<0b00_00_00_00>(a, 0b11111111, a);
47817 let e = _mm256_set1_ps(1.0);
47818 assert_eq_m256(r, e);
47819 }
47820
47821 #[simd_test(enable = "avx512f,avx512vl")]
47822 fn test_mm256_maskz_roundscale_ps() {
47823 let a = _mm256_set1_ps(1.1);
47824 let r = _mm256_maskz_roundscale_ps::<0b00_00_00_00>(0, a);
47825 assert_eq_m256(r, _mm256_setzero_ps());
47826 let r = _mm256_maskz_roundscale_ps::<0b00_00_00_00>(0b11111111, a);
47827 let e = _mm256_set1_ps(1.0);
47828 assert_eq_m256(r, e);
47829 }
47830
47831 #[simd_test(enable = "avx512f,avx512vl")]
47832 fn test_mm_roundscale_ps() {
47833 let a = _mm_set1_ps(1.1);
47834 let r = _mm_roundscale_ps::<0b00_00_00_00>(a);
47835 let e = _mm_set1_ps(1.0);
47836 assert_eq_m128(r, e);
47837 }
47838
47839 #[simd_test(enable = "avx512f,avx512vl")]
47840 fn test_mm_mask_roundscale_ps() {
47841 let a = _mm_set1_ps(1.1);
47842 let r = _mm_mask_roundscale_ps::<0b00_00_00_00>(a, 0, a);
47843 let e = _mm_set1_ps(1.1);
47844 assert_eq_m128(r, e);
47845 let r = _mm_mask_roundscale_ps::<0b00_00_00_00>(a, 0b00001111, a);
47846 let e = _mm_set1_ps(1.0);
47847 assert_eq_m128(r, e);
47848 }
47849
47850 #[simd_test(enable = "avx512f,avx512vl")]
47851 fn test_mm_maskz_roundscale_ps() {
47852 let a = _mm_set1_ps(1.1);
47853 let r = _mm_maskz_roundscale_ps::<0b00_00_00_00>(0, a);
47854 assert_eq_m128(r, _mm_setzero_ps());
47855 let r = _mm_maskz_roundscale_ps::<0b00_00_00_00>(0b00001111, a);
47856 let e = _mm_set1_ps(1.0);
47857 assert_eq_m128(r, e);
47858 }
47859
47860 #[simd_test(enable = "avx512f")]
47861 fn test_mm512_scalef_ps() {
47862 let a = _mm512_set1_ps(1.);
47863 let b = _mm512_set1_ps(3.);
47864 let r = _mm512_scalef_ps(a, b);
47865 let e = _mm512_set1_ps(8.);
47866 assert_eq_m512(r, e);
47867 }
47868
47869 #[simd_test(enable = "avx512f")]
47870 fn test_mm512_mask_scalef_ps() {
47871 let a = _mm512_set1_ps(1.);
47872 let b = _mm512_set1_ps(3.);
47873 let r = _mm512_mask_scalef_ps(a, 0, a, b);
47874 assert_eq_m512(r, a);
47875 let r = _mm512_mask_scalef_ps(a, 0b11111111_00000000, a, b);
47876 let e = _mm512_set_ps(
47877 8., 8., 8., 8., 8., 8., 8., 8., 1., 1., 1., 1., 1., 1., 1., 1.,
47878 );
47879 assert_eq_m512(r, e);
47880 }
47881
47882 #[simd_test(enable = "avx512f")]
47883 fn test_mm512_maskz_scalef_ps() {
47884 let a = _mm512_set1_ps(1.);
47885 let b = _mm512_set1_ps(3.);
47886 let r = _mm512_maskz_scalef_ps(0, a, b);
47887 assert_eq_m512(r, _mm512_setzero_ps());
47888 let r = _mm512_maskz_scalef_ps(0b11111111_00000000, a, b);
47889 let e = _mm512_set_ps(
47890 8., 8., 8., 8., 8., 8., 8., 8., 0., 0., 0., 0., 0., 0., 0., 0.,
47891 );
47892 assert_eq_m512(r, e);
47893 }
47894
47895 #[simd_test(enable = "avx512f,avx512vl")]
47896 fn test_mm256_scalef_ps() {
47897 let a = _mm256_set1_ps(1.);
47898 let b = _mm256_set1_ps(3.);
47899 let r = _mm256_scalef_ps(a, b);
47900 let e = _mm256_set1_ps(8.);
47901 assert_eq_m256(r, e);
47902 }
47903
47904 #[simd_test(enable = "avx512f,avx512vl")]
47905 fn test_mm256_mask_scalef_ps() {
47906 let a = _mm256_set1_ps(1.);
47907 let b = _mm256_set1_ps(3.);
47908 let r = _mm256_mask_scalef_ps(a, 0, a, b);
47909 assert_eq_m256(r, a);
47910 let r = _mm256_mask_scalef_ps(a, 0b11111111, a, b);
47911 let e = _mm256_set1_ps(8.);
47912 assert_eq_m256(r, e);
47913 }
47914
47915 #[simd_test(enable = "avx512f,avx512vl")]
47916 fn test_mm256_maskz_scalef_ps() {
47917 let a = _mm256_set1_ps(1.);
47918 let b = _mm256_set1_ps(3.);
47919 let r = _mm256_maskz_scalef_ps(0, a, b);
47920 assert_eq_m256(r, _mm256_setzero_ps());
47921 let r = _mm256_maskz_scalef_ps(0b11111111, a, b);
47922 let e = _mm256_set1_ps(8.);
47923 assert_eq_m256(r, e);
47924 }
47925
47926 #[simd_test(enable = "avx512f,avx512vl")]
47927 fn test_mm_scalef_ps() {
47928 let a = _mm_set1_ps(1.);
47929 let b = _mm_set1_ps(3.);
47930 let r = _mm_scalef_ps(a, b);
47931 let e = _mm_set1_ps(8.);
47932 assert_eq_m128(r, e);
47933 }
47934
47935 #[simd_test(enable = "avx512f,avx512vl")]
47936 fn test_mm_mask_scalef_ps() {
47937 let a = _mm_set1_ps(1.);
47938 let b = _mm_set1_ps(3.);
47939 let r = _mm_mask_scalef_ps(a, 0, a, b);
47940 assert_eq_m128(r, a);
47941 let r = _mm_mask_scalef_ps(a, 0b00001111, a, b);
47942 let e = _mm_set1_ps(8.);
47943 assert_eq_m128(r, e);
47944 }
47945
47946 #[simd_test(enable = "avx512f,avx512vl")]
47947 fn test_mm_maskz_scalef_ps() {
47948 let a = _mm_set1_ps(1.);
47949 let b = _mm_set1_ps(3.);
47950 let r = _mm_maskz_scalef_ps(0, a, b);
47951 assert_eq_m128(r, _mm_setzero_ps());
47952 let r = _mm_maskz_scalef_ps(0b00001111, a, b);
47953 let e = _mm_set1_ps(8.);
47954 assert_eq_m128(r, e);
47955 }
47956
47957 #[simd_test(enable = "avx512f")]
47958 fn test_mm512_fixupimm_ps() {
47959 let a = _mm512_set1_ps(f32::NAN);
47960 let b = _mm512_set1_ps(f32::MAX);
47961 let c = _mm512_set1_epi32(i32::MAX);
47962 //let r = _mm512_fixupimm_ps(a, b, c, 5);
47963 let r = _mm512_fixupimm_ps::<5>(a, b, c);
47964 let e = _mm512_set1_ps(0.0);
47965 assert_eq_m512(r, e);
47966 }
47967
47968 #[simd_test(enable = "avx512f")]
47969 fn test_mm512_mask_fixupimm_ps() {
47970 #[rustfmt::skip]
47971 let a = _mm512_set_ps(
47972 f32::NAN, f32::NAN, f32::NAN, f32::NAN,
47973 f32::NAN, f32::NAN, f32::NAN, f32::NAN,
47974 1., 1., 1., 1.,
47975 1., 1., 1., 1.,
47976 );
47977 let b = _mm512_set1_ps(f32::MAX);
47978 let c = _mm512_set1_epi32(i32::MAX);
47979 let r = _mm512_mask_fixupimm_ps::<5>(a, 0b11111111_00000000, b, c);
47980 let e = _mm512_set_ps(
47981 0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 1., 1., 1., 1., 1., 1.,
47982 );
47983 assert_eq_m512(r, e);
47984 }
47985
47986 #[simd_test(enable = "avx512f")]
47987 fn test_mm512_maskz_fixupimm_ps() {
47988 #[rustfmt::skip]
47989 let a = _mm512_set_ps(
47990 f32::NAN, f32::NAN, f32::NAN, f32::NAN,
47991 f32::NAN, f32::NAN, f32::NAN, f32::NAN,
47992 1., 1., 1., 1.,
47993 1., 1., 1., 1.,
47994 );
47995 let b = _mm512_set1_ps(f32::MAX);
47996 let c = _mm512_set1_epi32(i32::MAX);
47997 let r = _mm512_maskz_fixupimm_ps::<5>(0b11111111_00000000, a, b, c);
47998 let e = _mm512_set_ps(
47999 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
48000 );
48001 assert_eq_m512(r, e);
48002 }
48003
48004 #[simd_test(enable = "avx512f,avx512vl")]
48005 fn test_mm256_fixupimm_ps() {
48006 let a = _mm256_set1_ps(f32::NAN);
48007 let b = _mm256_set1_ps(f32::MAX);
48008 let c = _mm256_set1_epi32(i32::MAX);
48009 let r = _mm256_fixupimm_ps::<5>(a, b, c);
48010 let e = _mm256_set1_ps(0.0);
48011 assert_eq_m256(r, e);
48012 }
48013
48014 #[simd_test(enable = "avx512f,avx512vl")]
48015 fn test_mm256_mask_fixupimm_ps() {
48016 let a = _mm256_set1_ps(f32::NAN);
48017 let b = _mm256_set1_ps(f32::MAX);
48018 let c = _mm256_set1_epi32(i32::MAX);
48019 let r = _mm256_mask_fixupimm_ps::<5>(a, 0b11111111, b, c);
48020 let e = _mm256_set1_ps(0.0);
48021 assert_eq_m256(r, e);
48022 }
48023
48024 #[simd_test(enable = "avx512f,avx512vl")]
48025 fn test_mm256_maskz_fixupimm_ps() {
48026 let a = _mm256_set1_ps(f32::NAN);
48027 let b = _mm256_set1_ps(f32::MAX);
48028 let c = _mm256_set1_epi32(i32::MAX);
48029 let r = _mm256_maskz_fixupimm_ps::<5>(0b11111111, a, b, c);
48030 let e = _mm256_set1_ps(0.0);
48031 assert_eq_m256(r, e);
48032 }
48033
48034 #[simd_test(enable = "avx512f,avx512vl")]
48035 fn test_mm_fixupimm_ps() {
48036 let a = _mm_set1_ps(f32::NAN);
48037 let b = _mm_set1_ps(f32::MAX);
48038 let c = _mm_set1_epi32(i32::MAX);
48039 let r = _mm_fixupimm_ps::<5>(a, b, c);
48040 let e = _mm_set1_ps(0.0);
48041 assert_eq_m128(r, e);
48042 }
48043
48044 #[simd_test(enable = "avx512f,avx512vl")]
48045 fn test_mm_mask_fixupimm_ps() {
48046 let a = _mm_set1_ps(f32::NAN);
48047 let b = _mm_set1_ps(f32::MAX);
48048 let c = _mm_set1_epi32(i32::MAX);
48049 let r = _mm_mask_fixupimm_ps::<5>(a, 0b00001111, b, c);
48050 let e = _mm_set1_ps(0.0);
48051 assert_eq_m128(r, e);
48052 }
48053
48054 #[simd_test(enable = "avx512f,avx512vl")]
48055 fn test_mm_maskz_fixupimm_ps() {
48056 let a = _mm_set1_ps(f32::NAN);
48057 let b = _mm_set1_ps(f32::MAX);
48058 let c = _mm_set1_epi32(i32::MAX);
48059 let r = _mm_maskz_fixupimm_ps::<5>(0b00001111, a, b, c);
48060 let e = _mm_set1_ps(0.0);
48061 assert_eq_m128(r, e);
48062 }
48063
48064 #[simd_test(enable = "avx512f")]
48065 fn test_mm512_ternarylogic_epi32() {
48066 let a = _mm512_set4_epi32(0b100, 0b110, 0b001, 0b101);
48067 let b = _mm512_set4_epi32(0b010, 0b011, 0b001, 0b110);
48068 let c = _mm512_set4_epi32(0b001, 0b000, 0b001, 0b111);
48069
48070 // Identity of A.
48071 let r = _mm512_ternarylogic_epi32::<0b1111_0000>(a, b, c);
48072 assert_eq_m512i(r, a);
48073
48074 // Bitwise xor.
48075 let r = _mm512_ternarylogic_epi32::<0b10010110>(a, b, c);
48076 let e = _mm512_set4_epi32(0b111, 0b101, 0b001, 0b100);
48077 assert_eq_m512i(r, e);
48078 assert_eq_m512i(r, _mm512_xor_si512(_mm512_xor_si512(a, b), c));
48079
48080 // Majority (2 or more bits set).
48081 let r = _mm512_ternarylogic_epi32::<0b1110_1000>(a, b, c);
48082 let e = _mm512_set4_epi32(0b000, 0b010, 0b001, 0b111);
48083 assert_eq_m512i(r, e);
48084 }
48085
48086 #[simd_test(enable = "avx512f")]
48087 fn test_mm512_mask_ternarylogic_epi32() {
48088 let src = _mm512_set1_epi32(1 << 2);
48089 let a = _mm512_set1_epi32(1 << 1);
48090 let b = _mm512_set1_epi32(1 << 0);
48091 let r = _mm512_mask_ternarylogic_epi32::<8>(src, 0, a, b);
48092 assert_eq_m512i(r, src);
48093 let r = _mm512_mask_ternarylogic_epi32::<8>(src, 0b11111111_11111111, a, b);
48094 let e = _mm512_set1_epi32(0);
48095 assert_eq_m512i(r, e);
48096 }
48097
48098 #[simd_test(enable = "avx512f")]
48099 fn test_mm512_maskz_ternarylogic_epi32() {
48100 let a = _mm512_set1_epi32(1 << 2);
48101 let b = _mm512_set1_epi32(1 << 1);
48102 let c = _mm512_set1_epi32(1 << 0);
48103 let r = _mm512_maskz_ternarylogic_epi32::<9>(0, a, b, c);
48104 assert_eq_m512i(r, _mm512_setzero_si512());
48105 let r = _mm512_maskz_ternarylogic_epi32::<8>(0b11111111_11111111, a, b, c);
48106 let e = _mm512_set1_epi32(0);
48107 assert_eq_m512i(r, e);
48108 }
48109
48110 #[simd_test(enable = "avx512f,avx512vl")]
48111 fn test_mm256_ternarylogic_epi32() {
48112 let _mm256_set4_epi32 = |a, b, c, d| _mm256_setr_epi32(a, b, c, d, a, b, c, d);
48113
48114 let a = _mm256_set4_epi32(0b100, 0b110, 0b001, 0b101);
48115 let b = _mm256_set4_epi32(0b010, 0b011, 0b001, 0b110);
48116 let c = _mm256_set4_epi32(0b001, 0b000, 0b001, 0b111);
48117
48118 // Identity of A.
48119 let r = _mm256_ternarylogic_epi32::<0b1111_0000>(a, b, c);
48120 assert_eq_m256i(r, a);
48121
48122 // Bitwise xor.
48123 let r = _mm256_ternarylogic_epi32::<0b10010110>(a, b, c);
48124 let e = _mm256_set4_epi32(0b111, 0b101, 0b001, 0b100);
48125 assert_eq_m256i(r, e);
48126 assert_eq_m256i(r, _mm256_xor_si256(_mm256_xor_si256(a, b), c));
48127
48128 // Majority (2 or more bits set).
48129 let r = _mm256_ternarylogic_epi32::<0b1110_1000>(a, b, c);
48130 let e = _mm256_set4_epi32(0b000, 0b010, 0b001, 0b111);
48131 assert_eq_m256i(r, e);
48132 }
48133
48134 #[simd_test(enable = "avx512f,avx512vl")]
48135 fn test_mm256_mask_ternarylogic_epi32() {
48136 let src = _mm256_set1_epi32(1 << 2);
48137 let a = _mm256_set1_epi32(1 << 1);
48138 let b = _mm256_set1_epi32(1 << 0);
48139 let r = _mm256_mask_ternarylogic_epi32::<8>(src, 0, a, b);
48140 assert_eq_m256i(r, src);
48141 let r = _mm256_mask_ternarylogic_epi32::<8>(src, 0b11111111, a, b);
48142 let e = _mm256_set1_epi32(0);
48143 assert_eq_m256i(r, e);
48144 }
48145
48146 #[simd_test(enable = "avx512f,avx512vl")]
48147 fn test_mm256_maskz_ternarylogic_epi32() {
48148 let a = _mm256_set1_epi32(1 << 2);
48149 let b = _mm256_set1_epi32(1 << 1);
48150 let c = _mm256_set1_epi32(1 << 0);
48151 let r = _mm256_maskz_ternarylogic_epi32::<9>(0, a, b, c);
48152 assert_eq_m256i(r, _mm256_setzero_si256());
48153 let r = _mm256_maskz_ternarylogic_epi32::<8>(0b11111111, a, b, c);
48154 let e = _mm256_set1_epi32(0);
48155 assert_eq_m256i(r, e);
48156 }
48157
48158 #[simd_test(enable = "avx512f,avx512vl")]
48159 fn test_mm_ternarylogic_epi32() {
48160 let a = _mm_setr_epi32(0b100, 0b110, 0b001, 0b101);
48161 let b = _mm_setr_epi32(0b010, 0b011, 0b001, 0b110);
48162 let c = _mm_setr_epi32(0b001, 0b000, 0b001, 0b111);
48163
48164 // Identity of A.
48165 let r = _mm_ternarylogic_epi32::<0b1111_0000>(a, b, c);
48166 assert_eq_m128i(r, a);
48167
48168 // Bitwise xor.
48169 let r = _mm_ternarylogic_epi32::<0b10010110>(a, b, c);
48170 let e = _mm_setr_epi32(0b111, 0b101, 0b001, 0b100);
48171 assert_eq_m128i(r, e);
48172 assert_eq_m128i(r, _mm_xor_si128(_mm_xor_si128(a, b), c));
48173
48174 // Majority (2 or more bits set).
48175 let r = _mm_ternarylogic_epi32::<0b1110_1000>(a, b, c);
48176 let e = _mm_setr_epi32(0b000, 0b010, 0b001, 0b111);
48177 assert_eq_m128i(r, e);
48178 }
48179
48180 #[simd_test(enable = "avx512f,avx512vl")]
48181 fn test_mm_mask_ternarylogic_epi32() {
48182 let src = _mm_set1_epi32(1 << 2);
48183 let a = _mm_set1_epi32(1 << 1);
48184 let b = _mm_set1_epi32(1 << 0);
48185 let r = _mm_mask_ternarylogic_epi32::<8>(src, 0, a, b);
48186 assert_eq_m128i(r, src);
48187 let r = _mm_mask_ternarylogic_epi32::<8>(src, 0b00001111, a, b);
48188 let e = _mm_set1_epi32(0);
48189 assert_eq_m128i(r, e);
48190 }
48191
48192 #[simd_test(enable = "avx512f,avx512vl")]
48193 fn test_mm_maskz_ternarylogic_epi32() {
48194 let a = _mm_set1_epi32(1 << 2);
48195 let b = _mm_set1_epi32(1 << 1);
48196 let c = _mm_set1_epi32(1 << 0);
48197 let r = _mm_maskz_ternarylogic_epi32::<9>(0, a, b, c);
48198 assert_eq_m128i(r, _mm_setzero_si128());
48199 let r = _mm_maskz_ternarylogic_epi32::<8>(0b00001111, a, b, c);
48200 let e = _mm_set1_epi32(0);
48201 assert_eq_m128i(r, e);
48202 }
48203
48204 #[simd_test(enable = "avx512f")]
48205 fn test_mm512_getmant_ps() {
48206 let a = _mm512_set1_ps(10.);
48207 let r = _mm512_getmant_ps::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(a);
48208 let e = _mm512_set1_ps(1.25);
48209 assert_eq_m512(r, e);
48210 }
48211
48212 #[simd_test(enable = "avx512f")]
48213 fn test_mm512_mask_getmant_ps() {
48214 let a = _mm512_set1_ps(10.);
48215 let r = _mm512_mask_getmant_ps::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, 0, a);
48216 assert_eq_m512(r, a);
48217 let r = _mm512_mask_getmant_ps::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(
48218 a,
48219 0b11111111_00000000,
48220 a,
48221 );
48222 let e = _mm512_setr_ps(
48223 10., 10., 10., 10., 10., 10., 10., 10., 1.25, 1.25, 1.25, 1.25, 1.25, 1.25, 1.25, 1.25,
48224 );
48225 assert_eq_m512(r, e);
48226 }
48227
48228 #[simd_test(enable = "avx512f")]
48229 fn test_mm512_maskz_getmant_ps() {
48230 let a = _mm512_set1_ps(10.);
48231 let r = _mm512_maskz_getmant_ps::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(0, a);
48232 assert_eq_m512(r, _mm512_setzero_ps());
48233 let r =
48234 _mm512_maskz_getmant_ps::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(0b11111111_00000000, a);
48235 let e = _mm512_setr_ps(
48236 0., 0., 0., 0., 0., 0., 0., 0., 1.25, 1.25, 1.25, 1.25, 1.25, 1.25, 1.25, 1.25,
48237 );
48238 assert_eq_m512(r, e);
48239 }
48240
48241 #[simd_test(enable = "avx512f,avx512vl")]
48242 fn test_mm256_getmant_ps() {
48243 let a = _mm256_set1_ps(10.);
48244 let r = _mm256_getmant_ps::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(a);
48245 let e = _mm256_set1_ps(1.25);
48246 assert_eq_m256(r, e);
48247 }
48248
48249 #[simd_test(enable = "avx512f,avx512vl")]
48250 fn test_mm256_mask_getmant_ps() {
48251 let a = _mm256_set1_ps(10.);
48252 let r = _mm256_mask_getmant_ps::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, 0, a);
48253 assert_eq_m256(r, a);
48254 let r = _mm256_mask_getmant_ps::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, 0b11111111, a);
48255 let e = _mm256_set1_ps(1.25);
48256 assert_eq_m256(r, e);
48257 }
48258
48259 #[simd_test(enable = "avx512f,avx512vl")]
48260 fn test_mm256_maskz_getmant_ps() {
48261 let a = _mm256_set1_ps(10.);
48262 let r = _mm256_maskz_getmant_ps::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(0, a);
48263 assert_eq_m256(r, _mm256_setzero_ps());
48264 let r = _mm256_maskz_getmant_ps::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(0b11111111, a);
48265 let e = _mm256_set1_ps(1.25);
48266 assert_eq_m256(r, e);
48267 }
48268
48269 #[simd_test(enable = "avx512f,avx512vl")]
48270 fn test_mm_getmant_ps() {
48271 let a = _mm_set1_ps(10.);
48272 let r = _mm_getmant_ps::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(a);
48273 let e = _mm_set1_ps(1.25);
48274 assert_eq_m128(r, e);
48275 }
48276
48277 #[simd_test(enable = "avx512f,avx512vl")]
48278 fn test_mm_mask_getmant_ps() {
48279 let a = _mm_set1_ps(10.);
48280 let r = _mm_mask_getmant_ps::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, 0, a);
48281 assert_eq_m128(r, a);
48282 let r = _mm_mask_getmant_ps::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, 0b00001111, a);
48283 let e = _mm_set1_ps(1.25);
48284 assert_eq_m128(r, e);
48285 }
48286
48287 #[simd_test(enable = "avx512f,avx512vl")]
48288 fn test_mm_maskz_getmant_ps() {
48289 let a = _mm_set1_ps(10.);
48290 let r = _mm_maskz_getmant_ps::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(0, a);
48291 assert_eq_m128(r, _mm_setzero_ps());
48292 let r = _mm_maskz_getmant_ps::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(0b00001111, a);
48293 let e = _mm_set1_ps(1.25);
48294 assert_eq_m128(r, e);
48295 }
48296
48297 #[simd_test(enable = "avx512f")]
48298 fn test_mm512_add_round_ps() {
48299 let a = _mm512_setr_ps(
48300 0., 1.5, 2., 3.5, 4., 5.5, 6., 7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 0.00000007,
48301 );
48302 let b = _mm512_set1_ps(-1.);
48303 let r = _mm512_add_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
48304 #[rustfmt::skip]
48305 let e = _mm512_setr_ps(
48306 -1., 0.5, 1., 2.5,
48307 3., 4.5, 5., 6.5,
48308 7., 8.5, 9., 10.5,
48309 11., 12.5, 13., -0.99999994,
48310 );
48311 assert_eq_m512(r, e);
48312 let r = _mm512_add_round_ps::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
48313 let e = _mm512_setr_ps(
48314 -1., 0.5, 1., 2.5, 3., 4.5, 5., 6.5, 7., 8.5, 9., 10.5, 11., 12.5, 13., -0.9999999,
48315 );
48316 assert_eq_m512(r, e);
48317 }
48318
48319 #[simd_test(enable = "avx512f")]
48320 fn test_mm512_mask_add_round_ps() {
48321 let a = _mm512_setr_ps(
48322 0., 1.5, 2., 3.5, 4., 5.5, 6., 7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 0.00000007,
48323 );
48324 let b = _mm512_set1_ps(-1.);
48325 let r = _mm512_mask_add_round_ps::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, 0, a, b);
48326 assert_eq_m512(r, a);
48327 let r = _mm512_mask_add_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
48328 a,
48329 0b11111111_00000000,
48330 a,
48331 b,
48332 );
48333 #[rustfmt::skip]
48334 let e = _mm512_setr_ps(
48335 0., 1.5, 2., 3.5,
48336 4., 5.5, 6., 7.5,
48337 7., 8.5, 9., 10.5,
48338 11., 12.5, 13., -0.99999994,
48339 );
48340 assert_eq_m512(r, e);
48341 }
48342
48343 #[simd_test(enable = "avx512f")]
48344 fn test_mm512_maskz_add_round_ps() {
48345 let a = _mm512_setr_ps(
48346 0., 1.5, 2., 3.5, 4., 5.5, 6., 7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 0.00000007,
48347 );
48348 let b = _mm512_set1_ps(-1.);
48349 let r = _mm512_maskz_add_round_ps::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0, a, b);
48350 assert_eq_m512(r, _mm512_setzero_ps());
48351 let r = _mm512_maskz_add_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
48352 0b11111111_00000000,
48353 a,
48354 b,
48355 );
48356 #[rustfmt::skip]
48357 let e = _mm512_setr_ps(
48358 0., 0., 0., 0.,
48359 0., 0., 0., 0.,
48360 7., 8.5, 9., 10.5,
48361 11., 12.5, 13., -0.99999994,
48362 );
48363 assert_eq_m512(r, e);
48364 }
48365
48366 #[simd_test(enable = "avx512f")]
48367 fn test_mm512_sub_round_ps() {
48368 let a = _mm512_setr_ps(
48369 0., 1.5, 2., 3.5, 4., 5.5, 6., 7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 0.00000007,
48370 );
48371 let b = _mm512_set1_ps(1.);
48372 let r = _mm512_sub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
48373 #[rustfmt::skip]
48374 let e = _mm512_setr_ps(
48375 -1., 0.5, 1., 2.5,
48376 3., 4.5, 5., 6.5,
48377 7., 8.5, 9., 10.5,
48378 11., 12.5, 13., -0.99999994,
48379 );
48380 assert_eq_m512(r, e);
48381 let r = _mm512_sub_round_ps::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
48382 let e = _mm512_setr_ps(
48383 -1., 0.5, 1., 2.5, 3., 4.5, 5., 6.5, 7., 8.5, 9., 10.5, 11., 12.5, 13., -0.9999999,
48384 );
48385 assert_eq_m512(r, e);
48386 }
48387
48388 #[simd_test(enable = "avx512f")]
48389 fn test_mm512_mask_sub_round_ps() {
48390 let a = _mm512_setr_ps(
48391 0., 1.5, 2., 3.5, 4., 5.5, 6., 7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 0.00000007,
48392 );
48393 let b = _mm512_set1_ps(1.);
48394 let r = _mm512_mask_sub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
48395 a, 0, a, b,
48396 );
48397 assert_eq_m512(r, a);
48398 let r = _mm512_mask_sub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
48399 a,
48400 0b11111111_00000000,
48401 a,
48402 b,
48403 );
48404 #[rustfmt::skip]
48405 let e = _mm512_setr_ps(
48406 0., 1.5, 2., 3.5,
48407 4., 5.5, 6., 7.5,
48408 7., 8.5, 9., 10.5,
48409 11., 12.5, 13., -0.99999994,
48410 );
48411 assert_eq_m512(r, e);
48412 }
48413
48414 #[simd_test(enable = "avx512f")]
48415 fn test_mm512_maskz_sub_round_ps() {
48416 let a = _mm512_setr_ps(
48417 0., 1.5, 2., 3.5, 4., 5.5, 6., 7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 0.00000007,
48418 );
48419 let b = _mm512_set1_ps(1.);
48420 let r =
48421 _mm512_maskz_sub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
48422 assert_eq_m512(r, _mm512_setzero_ps());
48423 let r = _mm512_maskz_sub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
48424 0b11111111_00000000,
48425 a,
48426 b,
48427 );
48428 #[rustfmt::skip]
48429 let e = _mm512_setr_ps(
48430 0., 0., 0., 0.,
48431 0., 0., 0., 0.,
48432 7., 8.5, 9., 10.5,
48433 11., 12.5, 13., -0.99999994,
48434 );
48435 assert_eq_m512(r, e);
48436 }
48437
48438 #[simd_test(enable = "avx512f")]
48439 fn test_mm512_mul_round_ps() {
48440 #[rustfmt::skip]
48441 let a = _mm512_setr_ps(
48442 0., 1.5, 2., 3.5,
48443 4., 5.5, 6., 7.5,
48444 8., 9.5, 10., 11.5,
48445 12., 13.5, 14., 0.00000000000000000000007,
48446 );
48447 let b = _mm512_set1_ps(0.1);
48448 let r = _mm512_mul_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
48449 #[rustfmt::skip]
48450 let e = _mm512_setr_ps(
48451 0., 0.15, 0.2, 0.35,
48452 0.4, 0.55, 0.6, 0.75,
48453 0.8, 0.95, 1.0, 1.15,
48454 1.2, 1.35, 1.4, 0.000000000000000000000007000001,
48455 );
48456 assert_eq_m512(r, e);
48457 let r = _mm512_mul_round_ps::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
48458 #[rustfmt::skip]
48459 let e = _mm512_setr_ps(
48460 0., 0.14999999, 0.2, 0.35,
48461 0.4, 0.54999995, 0.59999996, 0.75,
48462 0.8, 0.95, 1.0, 1.15,
48463 1.1999999, 1.3499999, 1.4, 0.000000000000000000000007,
48464 );
48465 assert_eq_m512(r, e);
48466 }
48467
48468 #[simd_test(enable = "avx512f")]
48469 fn test_mm512_mask_mul_round_ps() {
48470 #[rustfmt::skip]
48471 let a = _mm512_setr_ps(
48472 0., 1.5, 2., 3.5,
48473 4., 5.5, 6., 7.5,
48474 8., 9.5, 10., 11.5,
48475 12., 13.5, 14., 0.00000000000000000000007,
48476 );
48477 let b = _mm512_set1_ps(0.1);
48478 let r = _mm512_mask_mul_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
48479 a, 0, a, b,
48480 );
48481 assert_eq_m512(r, a);
48482 let r = _mm512_mask_mul_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
48483 a,
48484 0b11111111_00000000,
48485 a,
48486 b,
48487 );
48488 #[rustfmt::skip]
48489 let e = _mm512_setr_ps(
48490 0., 1.5, 2., 3.5,
48491 4., 5.5, 6., 7.5,
48492 0.8, 0.95, 1.0, 1.15,
48493 1.2, 1.35, 1.4, 0.000000000000000000000007000001,
48494 );
48495 assert_eq_m512(r, e);
48496 }
48497
48498 #[simd_test(enable = "avx512f")]
48499 fn test_mm512_maskz_mul_round_ps() {
48500 #[rustfmt::skip]
48501 let a = _mm512_setr_ps(
48502 0., 1.5, 2., 3.5,
48503 4., 5.5, 6., 7.5,
48504 8., 9.5, 10., 11.5,
48505 12., 13.5, 14., 0.00000000000000000000007,
48506 );
48507 let b = _mm512_set1_ps(0.1);
48508 let r =
48509 _mm512_maskz_mul_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
48510 assert_eq_m512(r, _mm512_setzero_ps());
48511 let r = _mm512_maskz_mul_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
48512 0b11111111_00000000,
48513 a,
48514 b,
48515 );
48516 #[rustfmt::skip]
48517 let e = _mm512_setr_ps(
48518 0., 0., 0., 0.,
48519 0., 0., 0., 0.,
48520 0.8, 0.95, 1.0, 1.15,
48521 1.2, 1.35, 1.4, 0.000000000000000000000007000001,
48522 );
48523 assert_eq_m512(r, e);
48524 }
48525
48526 #[simd_test(enable = "avx512f")]
48527 fn test_mm512_div_round_ps() {
48528 let a = _mm512_set1_ps(1.);
48529 let b = _mm512_set1_ps(3.);
48530 let r = _mm512_div_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
48531 let e = _mm512_set1_ps(0.33333334);
48532 assert_eq_m512(r, e);
48533 let r = _mm512_div_round_ps::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
48534 let e = _mm512_set1_ps(0.3333333);
48535 assert_eq_m512(r, e);
48536 }
48537
48538 #[simd_test(enable = "avx512f")]
48539 fn test_mm512_mask_div_round_ps() {
48540 let a = _mm512_set1_ps(1.);
48541 let b = _mm512_set1_ps(3.);
48542 let r = _mm512_mask_div_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
48543 a, 0, a, b,
48544 );
48545 assert_eq_m512(r, a);
48546 let r = _mm512_mask_div_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
48547 a,
48548 0b11111111_00000000,
48549 a,
48550 b,
48551 );
48552 let e = _mm512_setr_ps(
48553 1., 1., 1., 1., 1., 1., 1., 1., 0.33333334, 0.33333334, 0.33333334, 0.33333334,
48554 0.33333334, 0.33333334, 0.33333334, 0.33333334,
48555 );
48556 assert_eq_m512(r, e);
48557 }
48558
48559 #[simd_test(enable = "avx512f")]
48560 fn test_mm512_maskz_div_round_ps() {
48561 let a = _mm512_set1_ps(1.);
48562 let b = _mm512_set1_ps(3.);
48563 let r =
48564 _mm512_maskz_div_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
48565 assert_eq_m512(r, _mm512_setzero_ps());
48566 let r = _mm512_maskz_div_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
48567 0b11111111_00000000,
48568 a,
48569 b,
48570 );
48571 let e = _mm512_setr_ps(
48572 0., 0., 0., 0., 0., 0., 0., 0., 0.33333334, 0.33333334, 0.33333334, 0.33333334,
48573 0.33333334, 0.33333334, 0.33333334, 0.33333334,
48574 );
48575 assert_eq_m512(r, e);
48576 }
48577
48578 #[simd_test(enable = "avx512f")]
48579 fn test_mm512_sqrt_round_ps() {
48580 let a = _mm512_set1_ps(3.);
48581 let r = _mm512_sqrt_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
48582 let e = _mm512_set1_ps(1.7320508);
48583 assert_eq_m512(r, e);
48584 let r = _mm512_sqrt_round_ps::<{ _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC }>(a);
48585 let e = _mm512_set1_ps(1.7320509);
48586 assert_eq_m512(r, e);
48587 }
48588
48589 #[simd_test(enable = "avx512f")]
48590 fn test_mm512_mask_sqrt_round_ps() {
48591 let a = _mm512_set1_ps(3.);
48592 let r =
48593 _mm512_mask_sqrt_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, 0, a);
48594 assert_eq_m512(r, a);
48595 let r = _mm512_mask_sqrt_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
48596 a,
48597 0b11111111_00000000,
48598 a,
48599 );
48600 let e = _mm512_setr_ps(
48601 3., 3., 3., 3., 3., 3., 3., 3., 1.7320508, 1.7320508, 1.7320508, 1.7320508, 1.7320508,
48602 1.7320508, 1.7320508, 1.7320508,
48603 );
48604 assert_eq_m512(r, e);
48605 }
48606
48607 #[simd_test(enable = "avx512f")]
48608 fn test_mm512_maskz_sqrt_round_ps() {
48609 let a = _mm512_set1_ps(3.);
48610 let r =
48611 _mm512_maskz_sqrt_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a);
48612 assert_eq_m512(r, _mm512_setzero_ps());
48613 let r = _mm512_maskz_sqrt_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
48614 0b11111111_00000000,
48615 a,
48616 );
48617 let e = _mm512_setr_ps(
48618 0., 0., 0., 0., 0., 0., 0., 0., 1.7320508, 1.7320508, 1.7320508, 1.7320508, 1.7320508,
48619 1.7320508, 1.7320508, 1.7320508,
48620 );
48621 assert_eq_m512(r, e);
48622 }
48623
48624 #[simd_test(enable = "avx512f")]
48625 fn test_mm512_fmadd_round_ps() {
48626 let a = _mm512_set1_ps(0.00000007);
48627 let b = _mm512_set1_ps(1.);
48628 let c = _mm512_set1_ps(-1.);
48629 let r = _mm512_fmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
48630 let e = _mm512_set1_ps(-0.99999994);
48631 assert_eq_m512(r, e);
48632 let r = _mm512_fmadd_round_ps::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b, c);
48633 let e = _mm512_set1_ps(-0.9999999);
48634 assert_eq_m512(r, e);
48635 }
48636
48637 #[simd_test(enable = "avx512f")]
48638 fn test_mm512_mask_fmadd_round_ps() {
48639 let a = _mm512_set1_ps(0.00000007);
48640 let b = _mm512_set1_ps(1.);
48641 let c = _mm512_set1_ps(-1.);
48642 let r = _mm512_mask_fmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
48643 a, 0, b, c,
48644 );
48645 assert_eq_m512(r, a);
48646 let r = _mm512_mask_fmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
48647 a,
48648 0b00000000_11111111,
48649 b,
48650 c,
48651 );
48652 #[rustfmt::skip]
48653 let e = _mm512_setr_ps(
48654 -0.99999994, -0.99999994, -0.99999994, -0.99999994,
48655 -0.99999994, -0.99999994, -0.99999994, -0.99999994,
48656 0.00000007, 0.00000007, 0.00000007, 0.00000007,
48657 0.00000007, 0.00000007, 0.00000007, 0.00000007,
48658 );
48659 assert_eq_m512(r, e);
48660 }
48661
48662 #[simd_test(enable = "avx512f")]
48663 fn test_mm512_maskz_fmadd_round_ps() {
48664 let a = _mm512_set1_ps(0.00000007);
48665 let b = _mm512_set1_ps(1.);
48666 let c = _mm512_set1_ps(-1.);
48667 let r = _mm512_maskz_fmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
48668 0, a, b, c,
48669 );
48670 assert_eq_m512(r, _mm512_setzero_ps());
48671 #[rustfmt::skip]
48672 let r = _mm512_maskz_fmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
48673 0b00000000_11111111,
48674 a,
48675 b,
48676 c,
48677 );
48678 #[rustfmt::skip]
48679 let e = _mm512_setr_ps(
48680 -0.99999994, -0.99999994, -0.99999994, -0.99999994,
48681 -0.99999994, -0.99999994, -0.99999994, -0.99999994,
48682 0., 0., 0., 0.,
48683 0., 0., 0., 0.,
48684 );
48685 assert_eq_m512(r, e);
48686 }
48687
48688 #[simd_test(enable = "avx512f")]
48689 fn test_mm512_mask3_fmadd_round_ps() {
48690 let a = _mm512_set1_ps(0.00000007);
48691 let b = _mm512_set1_ps(1.);
48692 let c = _mm512_set1_ps(-1.);
48693 let r = _mm512_mask3_fmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
48694 a, b, c, 0,
48695 );
48696 assert_eq_m512(r, c);
48697 let r = _mm512_mask3_fmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
48698 a,
48699 b,
48700 c,
48701 0b00000000_11111111,
48702 );
48703 #[rustfmt::skip]
48704 let e = _mm512_setr_ps(
48705 -0.99999994, -0.99999994, -0.99999994, -0.99999994,
48706 -0.99999994, -0.99999994, -0.99999994, -0.99999994,
48707 -1., -1., -1., -1.,
48708 -1., -1., -1., -1.,
48709 );
48710 assert_eq_m512(r, e);
48711 }
48712
48713 #[simd_test(enable = "avx512f")]
48714 fn test_mm512_fmsub_round_ps() {
48715 let a = _mm512_set1_ps(0.00000007);
48716 let b = _mm512_set1_ps(1.);
48717 let c = _mm512_set1_ps(1.);
48718 let r = _mm512_fmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
48719 let e = _mm512_set1_ps(-0.99999994);
48720 assert_eq_m512(r, e);
48721 let r = _mm512_fmsub_round_ps::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b, c);
48722 let e = _mm512_set1_ps(-0.9999999);
48723 assert_eq_m512(r, e);
48724 }
48725
48726 #[simd_test(enable = "avx512f")]
48727 fn test_mm512_mask_fmsub_round_ps() {
48728 let a = _mm512_set1_ps(0.00000007);
48729 let b = _mm512_set1_ps(1.);
48730 let c = _mm512_set1_ps(1.);
48731 let r = _mm512_mask_fmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
48732 a, 0, b, c,
48733 );
48734 assert_eq_m512(r, a);
48735 let r = _mm512_mask_fmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
48736 a,
48737 0b00000000_11111111,
48738 b,
48739 c,
48740 );
48741 #[rustfmt::skip]
48742 let e = _mm512_setr_ps(
48743 -0.99999994, -0.99999994, -0.99999994, -0.99999994,
48744 -0.99999994, -0.99999994, -0.99999994, -0.99999994,
48745 0.00000007, 0.00000007, 0.00000007, 0.00000007,
48746 0.00000007, 0.00000007, 0.00000007, 0.00000007,
48747 );
48748 assert_eq_m512(r, e);
48749 }
48750
48751 #[simd_test(enable = "avx512f")]
48752 fn test_mm512_maskz_fmsub_round_ps() {
48753 let a = _mm512_set1_ps(0.00000007);
48754 let b = _mm512_set1_ps(1.);
48755 let c = _mm512_set1_ps(1.);
48756 let r = _mm512_maskz_fmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
48757 0, a, b, c,
48758 );
48759 assert_eq_m512(r, _mm512_setzero_ps());
48760 let r = _mm512_maskz_fmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
48761 0b00000000_11111111,
48762 a,
48763 b,
48764 c,
48765 );
48766 #[rustfmt::skip]
48767 let e = _mm512_setr_ps(
48768 -0.99999994, -0.99999994, -0.99999994, -0.99999994,
48769 -0.99999994, -0.99999994, -0.99999994, -0.99999994,
48770 0., 0., 0., 0.,
48771 0., 0., 0., 0.,
48772 );
48773 assert_eq_m512(r, e);
48774 }
48775
48776 #[simd_test(enable = "avx512f")]
48777 fn test_mm512_mask3_fmsub_round_ps() {
48778 let a = _mm512_set1_ps(0.00000007);
48779 let b = _mm512_set1_ps(1.);
48780 let c = _mm512_set1_ps(1.);
48781 let r = _mm512_mask3_fmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
48782 a, b, c, 0,
48783 );
48784 assert_eq_m512(r, c);
48785 let r = _mm512_mask3_fmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
48786 a,
48787 b,
48788 c,
48789 0b00000000_11111111,
48790 );
48791 #[rustfmt::skip]
48792 let e = _mm512_setr_ps(
48793 -0.99999994, -0.99999994, -0.99999994, -0.99999994,
48794 -0.99999994, -0.99999994, -0.99999994, -0.99999994,
48795 1., 1., 1., 1.,
48796 1., 1., 1., 1.,
48797 );
48798 assert_eq_m512(r, e);
48799 }
48800
48801 #[simd_test(enable = "avx512f")]
48802 fn test_mm512_fmaddsub_round_ps() {
48803 let a = _mm512_set1_ps(0.00000007);
48804 let b = _mm512_set1_ps(1.);
48805 let c = _mm512_set1_ps(-1.);
48806 let r =
48807 _mm512_fmaddsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
48808 #[rustfmt::skip]
48809 let e = _mm512_setr_ps(
48810 1.0000001, -0.99999994, 1.0000001, -0.99999994,
48811 1.0000001, -0.99999994, 1.0000001, -0.99999994,
48812 1.0000001, -0.99999994, 1.0000001, -0.99999994,
48813 1.0000001, -0.99999994, 1.0000001, -0.99999994,
48814 );
48815 assert_eq_m512(r, e);
48816 let r = _mm512_fmaddsub_round_ps::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b, c);
48817 let e = _mm512_setr_ps(
48818 1., -0.9999999, 1., -0.9999999, 1., -0.9999999, 1., -0.9999999, 1., -0.9999999, 1.,
48819 -0.9999999, 1., -0.9999999, 1., -0.9999999,
48820 );
48821 assert_eq_m512(r, e);
48822 }
48823
48824 #[simd_test(enable = "avx512f")]
48825 fn test_mm512_mask_fmaddsub_round_ps() {
48826 let a = _mm512_set1_ps(0.00000007);
48827 let b = _mm512_set1_ps(1.);
48828 let c = _mm512_set1_ps(-1.);
48829 let r = _mm512_mask_fmaddsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
48830 a, 0, b, c,
48831 );
48832 assert_eq_m512(r, a);
48833 let r = _mm512_mask_fmaddsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
48834 a,
48835 0b00000000_11111111,
48836 b,
48837 c,
48838 );
48839 #[rustfmt::skip]
48840 let e = _mm512_setr_ps(
48841 1.0000001, -0.99999994, 1.0000001, -0.99999994,
48842 1.0000001, -0.99999994, 1.0000001, -0.99999994,
48843 0.00000007, 0.00000007, 0.00000007, 0.00000007,
48844 0.00000007, 0.00000007, 0.00000007, 0.00000007,
48845 );
48846 assert_eq_m512(r, e);
48847 }
48848
48849 #[simd_test(enable = "avx512f")]
48850 fn test_mm512_maskz_fmaddsub_round_ps() {
48851 let a = _mm512_set1_ps(0.00000007);
48852 let b = _mm512_set1_ps(1.);
48853 let c = _mm512_set1_ps(-1.);
48854 let r = _mm512_maskz_fmaddsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
48855 0, a, b, c,
48856 );
48857 assert_eq_m512(r, _mm512_setzero_ps());
48858 let r = _mm512_maskz_fmaddsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
48859 0b00000000_11111111,
48860 a,
48861 b,
48862 c,
48863 );
48864 #[rustfmt::skip]
48865 let e = _mm512_setr_ps(
48866 1.0000001, -0.99999994, 1.0000001, -0.99999994,
48867 1.0000001, -0.99999994, 1.0000001, -0.99999994,
48868 0., 0., 0., 0.,
48869 0., 0., 0., 0.,
48870 );
48871 assert_eq_m512(r, e);
48872 }
48873
48874 #[simd_test(enable = "avx512f")]
48875 fn test_mm512_mask3_fmaddsub_round_ps() {
48876 let a = _mm512_set1_ps(0.00000007);
48877 let b = _mm512_set1_ps(1.);
48878 let c = _mm512_set1_ps(-1.);
48879 let r = _mm512_mask3_fmaddsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
48880 a, b, c, 0,
48881 );
48882 assert_eq_m512(r, c);
48883 let r = _mm512_mask3_fmaddsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
48884 a,
48885 b,
48886 c,
48887 0b00000000_11111111,
48888 );
48889 #[rustfmt::skip]
48890 let e = _mm512_setr_ps(
48891 1.0000001, -0.99999994, 1.0000001, -0.99999994,
48892 1.0000001, -0.99999994, 1.0000001, -0.99999994,
48893 -1., -1., -1., -1.,
48894 -1., -1., -1., -1.,
48895 );
48896 assert_eq_m512(r, e);
48897 }
48898
48899 #[simd_test(enable = "avx512f")]
48900 fn test_mm512_fmsubadd_round_ps() {
48901 let a = _mm512_set1_ps(0.00000007);
48902 let b = _mm512_set1_ps(1.);
48903 let c = _mm512_set1_ps(-1.);
48904 let r =
48905 _mm512_fmsubadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
48906 #[rustfmt::skip]
48907 let e = _mm512_setr_ps(
48908 -0.99999994, 1.0000001, -0.99999994, 1.0000001,
48909 -0.99999994, 1.0000001, -0.99999994, 1.0000001,
48910 -0.99999994, 1.0000001, -0.99999994, 1.0000001,
48911 -0.99999994, 1.0000001, -0.99999994, 1.0000001,
48912 );
48913 assert_eq_m512(r, e);
48914 let r = _mm512_fmsubadd_round_ps::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b, c);
48915 let e = _mm512_setr_ps(
48916 -0.9999999, 1., -0.9999999, 1., -0.9999999, 1., -0.9999999, 1., -0.9999999, 1.,
48917 -0.9999999, 1., -0.9999999, 1., -0.9999999, 1.,
48918 );
48919 assert_eq_m512(r, e);
48920 }
48921
48922 #[simd_test(enable = "avx512f")]
48923 fn test_mm512_mask_fmsubadd_round_ps() {
48924 let a = _mm512_set1_ps(0.00000007);
48925 let b = _mm512_set1_ps(1.);
48926 let c = _mm512_set1_ps(-1.);
48927 let r = _mm512_mask_fmsubadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
48928 a, 0, b, c,
48929 );
48930 assert_eq_m512(r, a);
48931 let r = _mm512_mask_fmsubadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
48932 a,
48933 0b00000000_11111111,
48934 b,
48935 c,
48936 );
48937 #[rustfmt::skip]
48938 let e = _mm512_setr_ps(
48939 -0.99999994, 1.0000001, -0.99999994, 1.0000001,
48940 -0.99999994, 1.0000001, -0.99999994, 1.0000001,
48941 0.00000007, 0.00000007, 0.00000007, 0.00000007,
48942 0.00000007, 0.00000007, 0.00000007, 0.00000007,
48943 );
48944 assert_eq_m512(r, e);
48945 }
48946
48947 #[simd_test(enable = "avx512f")]
48948 fn test_mm512_maskz_fmsubadd_round_ps() {
48949 let a = _mm512_set1_ps(0.00000007);
48950 let b = _mm512_set1_ps(1.);
48951 let c = _mm512_set1_ps(-1.);
48952 let r = _mm512_maskz_fmsubadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
48953 0, a, b, c,
48954 );
48955 assert_eq_m512(r, _mm512_setzero_ps());
48956 let r = _mm512_maskz_fmsubadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
48957 0b00000000_11111111,
48958 a,
48959 b,
48960 c,
48961 );
48962 #[rustfmt::skip]
48963 let e = _mm512_setr_ps(
48964 -0.99999994, 1.0000001, -0.99999994, 1.0000001,
48965 -0.99999994, 1.0000001, -0.99999994, 1.0000001,
48966 0., 0., 0., 0.,
48967 0., 0., 0., 0.,
48968 );
48969 assert_eq_m512(r, e);
48970 }
48971
48972 #[simd_test(enable = "avx512f")]
48973 fn test_mm512_mask3_fmsubadd_round_ps() {
48974 let a = _mm512_set1_ps(0.00000007);
48975 let b = _mm512_set1_ps(1.);
48976 let c = _mm512_set1_ps(-1.);
48977 let r = _mm512_mask3_fmsubadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
48978 a, b, c, 0,
48979 );
48980 assert_eq_m512(r, c);
48981 let r = _mm512_mask3_fmsubadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
48982 a,
48983 b,
48984 c,
48985 0b00000000_11111111,
48986 );
48987 #[rustfmt::skip]
48988 let e = _mm512_setr_ps(
48989 -0.99999994, 1.0000001, -0.99999994, 1.0000001,
48990 -0.99999994, 1.0000001, -0.99999994, 1.0000001,
48991 -1., -1., -1., -1.,
48992 -1., -1., -1., -1.,
48993 );
48994 assert_eq_m512(r, e);
48995 }
48996
48997 #[simd_test(enable = "avx512f")]
48998 fn test_mm512_fnmadd_round_ps() {
48999 let a = _mm512_set1_ps(0.00000007);
49000 let b = _mm512_set1_ps(1.);
49001 let c = _mm512_set1_ps(1.);
49002 let r =
49003 _mm512_fnmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
49004 let e = _mm512_set1_ps(0.99999994);
49005 assert_eq_m512(r, e);
49006 let r = _mm512_fnmadd_round_ps::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b, c);
49007 let e = _mm512_set1_ps(0.9999999);
49008 assert_eq_m512(r, e);
49009 }
49010
49011 #[simd_test(enable = "avx512f")]
49012 fn test_mm512_mask_fnmadd_round_ps() {
49013 let a = _mm512_set1_ps(0.00000007);
49014 let b = _mm512_set1_ps(1.);
49015 let c = _mm512_set1_ps(1.);
49016 let r = _mm512_mask_fnmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
49017 a, 0, b, c,
49018 );
49019 assert_eq_m512(r, a);
49020 let r = _mm512_mask_fnmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
49021 a,
49022 0b00000000_11111111,
49023 b,
49024 c,
49025 );
49026 let e = _mm512_setr_ps(
49027 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994,
49028 0.99999994, 0.00000007, 0.00000007, 0.00000007, 0.00000007, 0.00000007, 0.00000007,
49029 0.00000007, 0.00000007,
49030 );
49031 assert_eq_m512(r, e);
49032 }
49033
49034 #[simd_test(enable = "avx512f")]
49035 fn test_mm512_maskz_fnmadd_round_ps() {
49036 let a = _mm512_set1_ps(0.00000007);
49037 let b = _mm512_set1_ps(1.);
49038 let c = _mm512_set1_ps(1.);
49039 let r = _mm512_maskz_fnmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
49040 0, a, b, c,
49041 );
49042 assert_eq_m512(r, _mm512_setzero_ps());
49043 let r = _mm512_maskz_fnmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
49044 0b00000000_11111111,
49045 a,
49046 b,
49047 c,
49048 );
49049 let e = _mm512_setr_ps(
49050 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994,
49051 0.99999994, 0., 0., 0., 0., 0., 0., 0., 0.,
49052 );
49053 assert_eq_m512(r, e);
49054 }
49055
49056 #[simd_test(enable = "avx512f")]
49057 fn test_mm512_mask3_fnmadd_round_ps() {
49058 let a = _mm512_set1_ps(0.00000007);
49059 let b = _mm512_set1_ps(1.);
49060 let c = _mm512_set1_ps(1.);
49061 let r = _mm512_mask3_fnmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
49062 a, b, c, 0,
49063 );
49064 assert_eq_m512(r, c);
49065 let r = _mm512_mask3_fnmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
49066 a,
49067 b,
49068 c,
49069 0b00000000_11111111,
49070 );
49071 let e = _mm512_setr_ps(
49072 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994,
49073 0.99999994, 1., 1., 1., 1., 1., 1., 1., 1.,
49074 );
49075 assert_eq_m512(r, e);
49076 }
49077
49078 #[simd_test(enable = "avx512f")]
49079 fn test_mm512_fnmsub_round_ps() {
49080 let a = _mm512_set1_ps(0.00000007);
49081 let b = _mm512_set1_ps(1.);
49082 let c = _mm512_set1_ps(-1.);
49083 let r =
49084 _mm512_fnmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
49085 let e = _mm512_set1_ps(0.99999994);
49086 assert_eq_m512(r, e);
49087 let r = _mm512_fnmsub_round_ps::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b, c);
49088 let e = _mm512_set1_ps(0.9999999);
49089 assert_eq_m512(r, e);
49090 }
49091
49092 #[simd_test(enable = "avx512f")]
49093 fn test_mm512_mask_fnmsub_round_ps() {
49094 let a = _mm512_set1_ps(0.00000007);
49095 let b = _mm512_set1_ps(1.);
49096 let c = _mm512_set1_ps(-1.);
49097 let r = _mm512_mask_fnmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
49098 a, 0, b, c,
49099 );
49100 assert_eq_m512(r, a);
49101 let r = _mm512_mask_fnmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
49102 a,
49103 0b00000000_11111111,
49104 b,
49105 c,
49106 );
49107 let e = _mm512_setr_ps(
49108 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994,
49109 0.99999994, 0.00000007, 0.00000007, 0.00000007, 0.00000007, 0.00000007, 0.00000007,
49110 0.00000007, 0.00000007,
49111 );
49112 assert_eq_m512(r, e);
49113 }
49114
49115 #[simd_test(enable = "avx512f")]
49116 fn test_mm512_maskz_fnmsub_round_ps() {
49117 let a = _mm512_set1_ps(0.00000007);
49118 let b = _mm512_set1_ps(1.);
49119 let c = _mm512_set1_ps(-1.);
49120 let r = _mm512_maskz_fnmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
49121 0, a, b, c,
49122 );
49123 assert_eq_m512(r, _mm512_setzero_ps());
49124 let r = _mm512_maskz_fnmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
49125 0b00000000_11111111,
49126 a,
49127 b,
49128 c,
49129 );
49130 let e = _mm512_setr_ps(
49131 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994,
49132 0.99999994, 0., 0., 0., 0., 0., 0., 0., 0.,
49133 );
49134 assert_eq_m512(r, e);
49135 }
49136
49137 #[simd_test(enable = "avx512f")]
49138 fn test_mm512_mask3_fnmsub_round_ps() {
49139 let a = _mm512_set1_ps(0.00000007);
49140 let b = _mm512_set1_ps(1.);
49141 let c = _mm512_set1_ps(-1.);
49142 let r = _mm512_mask3_fnmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
49143 a, b, c, 0,
49144 );
49145 assert_eq_m512(r, c);
49146 let r = _mm512_mask3_fnmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
49147 a,
49148 b,
49149 c,
49150 0b00000000_11111111,
49151 );
49152 let e = _mm512_setr_ps(
49153 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994,
49154 0.99999994, -1., -1., -1., -1., -1., -1., -1., -1.,
49155 );
49156 assert_eq_m512(r, e);
49157 }
49158
49159 #[simd_test(enable = "avx512f")]
49160 fn test_mm512_max_round_ps() {
49161 let a = _mm512_setr_ps(
49162 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
49163 );
49164 let b = _mm512_setr_ps(
49165 15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
49166 );
49167 let r = _mm512_max_round_ps::<_MM_FROUND_CUR_DIRECTION>(a, b);
49168 let e = _mm512_setr_ps(
49169 15., 14., 13., 12., 11., 10., 9., 8., 8., 9., 10., 11., 12., 13., 14., 15.,
49170 );
49171 assert_eq_m512(r, e);
49172 }
49173
49174 #[simd_test(enable = "avx512f")]
49175 fn test_mm512_mask_max_round_ps() {
49176 let a = _mm512_setr_ps(
49177 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
49178 );
49179 let b = _mm512_setr_ps(
49180 15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
49181 );
49182 let r = _mm512_mask_max_round_ps::<_MM_FROUND_CUR_DIRECTION>(a, 0, a, b);
49183 assert_eq_m512(r, a);
49184 let r = _mm512_mask_max_round_ps::<_MM_FROUND_CUR_DIRECTION>(a, 0b00000000_11111111, a, b);
49185 let e = _mm512_setr_ps(
49186 15., 14., 13., 12., 11., 10., 9., 8., 8., 9., 10., 11., 12., 13., 14., 15.,
49187 );
49188 assert_eq_m512(r, e);
49189 }
49190
49191 #[simd_test(enable = "avx512f")]
49192 fn test_mm512_maskz_max_round_ps() {
49193 let a = _mm512_setr_ps(
49194 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
49195 );
49196 let b = _mm512_setr_ps(
49197 15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
49198 );
49199 let r = _mm512_maskz_max_round_ps::<_MM_FROUND_CUR_DIRECTION>(0, a, b);
49200 assert_eq_m512(r, _mm512_setzero_ps());
49201 let r = _mm512_maskz_max_round_ps::<_MM_FROUND_CUR_DIRECTION>(0b00000000_11111111, a, b);
49202 let e = _mm512_setr_ps(
49203 15., 14., 13., 12., 11., 10., 9., 8., 0., 0., 0., 0., 0., 0., 0., 0.,
49204 );
49205 assert_eq_m512(r, e);
49206 }
49207
49208 #[simd_test(enable = "avx512f")]
49209 fn test_mm512_min_round_ps() {
49210 let a = _mm512_setr_ps(
49211 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
49212 );
49213 let b = _mm512_setr_ps(
49214 15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
49215 );
49216 let r = _mm512_min_round_ps::<_MM_FROUND_CUR_DIRECTION>(a, b);
49217 let e = _mm512_setr_ps(
49218 0., 1., 2., 3., 4., 5., 6., 7., 7., 6., 5., 4., 3., 2., 1., 0.,
49219 );
49220 assert_eq_m512(r, e);
49221 }
49222
49223 #[simd_test(enable = "avx512f")]
49224 fn test_mm512_mask_min_round_ps() {
49225 let a = _mm512_setr_ps(
49226 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
49227 );
49228 let b = _mm512_setr_ps(
49229 15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
49230 );
49231 let r = _mm512_mask_min_round_ps::<_MM_FROUND_CUR_DIRECTION>(a, 0, a, b);
49232 assert_eq_m512(r, a);
49233 let r = _mm512_mask_min_round_ps::<_MM_FROUND_CUR_DIRECTION>(a, 0b00000000_11111111, a, b);
49234 let e = _mm512_setr_ps(
49235 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
49236 );
49237 assert_eq_m512(r, e);
49238 }
49239
49240 #[simd_test(enable = "avx512f")]
49241 fn test_mm512_maskz_min_round_ps() {
49242 let a = _mm512_setr_ps(
49243 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
49244 );
49245 let b = _mm512_setr_ps(
49246 15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
49247 );
49248 let r = _mm512_maskz_min_round_ps::<_MM_FROUND_CUR_DIRECTION>(0, a, b);
49249 assert_eq_m512(r, _mm512_setzero_ps());
49250 let r = _mm512_maskz_min_round_ps::<_MM_FROUND_CUR_DIRECTION>(0b00000000_11111111, a, b);
49251 let e = _mm512_setr_ps(
49252 0., 1., 2., 3., 4., 5., 6., 7., 0., 0., 0., 0., 0., 0., 0., 0.,
49253 );
49254 assert_eq_m512(r, e);
49255 }
49256
49257 #[simd_test(enable = "avx512f")]
49258 fn test_mm512_getexp_round_ps() {
49259 let a = _mm512_set1_ps(3.);
49260 let r = _mm512_getexp_round_ps::<_MM_FROUND_CUR_DIRECTION>(a);
49261 let e = _mm512_set1_ps(1.);
49262 assert_eq_m512(r, e);
49263 }
49264
49265 #[simd_test(enable = "avx512f")]
49266 fn test_mm512_mask_getexp_round_ps() {
49267 let a = _mm512_set1_ps(3.);
49268 let r = _mm512_mask_getexp_round_ps::<_MM_FROUND_CUR_DIRECTION>(a, 0, a);
49269 assert_eq_m512(r, a);
49270 let r = _mm512_mask_getexp_round_ps::<_MM_FROUND_CUR_DIRECTION>(a, 0b11111111_00000000, a);
49271 let e = _mm512_setr_ps(
49272 3., 3., 3., 3., 3., 3., 3., 3., 1., 1., 1., 1., 1., 1., 1., 1.,
49273 );
49274 assert_eq_m512(r, e);
49275 }
49276
49277 #[simd_test(enable = "avx512f")]
49278 fn test_mm512_maskz_getexp_round_ps() {
49279 let a = _mm512_set1_ps(3.);
49280 let r = _mm512_maskz_getexp_round_ps::<_MM_FROUND_CUR_DIRECTION>(0, a);
49281 assert_eq_m512(r, _mm512_setzero_ps());
49282 let r = _mm512_maskz_getexp_round_ps::<_MM_FROUND_CUR_DIRECTION>(0b11111111_00000000, a);
49283 let e = _mm512_setr_ps(
49284 0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 1., 1., 1., 1., 1., 1.,
49285 );
49286 assert_eq_m512(r, e);
49287 }
49288
49289 #[simd_test(enable = "avx512f")]
49290 fn test_mm512_roundscale_round_ps() {
49291 let a = _mm512_set1_ps(1.1);
49292 let r = _mm512_roundscale_round_ps::<0, _MM_FROUND_CUR_DIRECTION>(a);
49293 let e = _mm512_set1_ps(1.0);
49294 assert_eq_m512(r, e);
49295 }
49296
49297 #[simd_test(enable = "avx512f")]
49298 fn test_mm512_mask_roundscale_round_ps() {
49299 let a = _mm512_set1_ps(1.1);
49300 let r = _mm512_mask_roundscale_round_ps::<0, _MM_FROUND_CUR_DIRECTION>(a, 0, a);
49301 let e = _mm512_set1_ps(1.1);
49302 assert_eq_m512(r, e);
49303 let r = _mm512_mask_roundscale_round_ps::<0, _MM_FROUND_CUR_DIRECTION>(
49304 a,
49305 0b11111111_11111111,
49306 a,
49307 );
49308 let e = _mm512_set1_ps(1.0);
49309 assert_eq_m512(r, e);
49310 }
49311
49312 #[simd_test(enable = "avx512f")]
49313 fn test_mm512_maskz_roundscale_round_ps() {
49314 let a = _mm512_set1_ps(1.1);
49315 let r = _mm512_maskz_roundscale_round_ps::<0, _MM_FROUND_CUR_DIRECTION>(0, a);
49316 assert_eq_m512(r, _mm512_setzero_ps());
49317 let r =
49318 _mm512_maskz_roundscale_round_ps::<0, _MM_FROUND_CUR_DIRECTION>(0b11111111_11111111, a);
49319 let e = _mm512_set1_ps(1.0);
49320 assert_eq_m512(r, e);
49321 }
49322
49323 #[simd_test(enable = "avx512f")]
49324 fn test_mm512_scalef_round_ps() {
49325 let a = _mm512_set1_ps(1.);
49326 let b = _mm512_set1_ps(3.);
49327 let r = _mm512_scalef_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
49328 let e = _mm512_set1_ps(8.);
49329 assert_eq_m512(r, e);
49330 }
49331
49332 #[simd_test(enable = "avx512f")]
49333 fn test_mm512_mask_scalef_round_ps() {
49334 let a = _mm512_set1_ps(1.);
49335 let b = _mm512_set1_ps(3.);
49336 let r = _mm512_mask_scalef_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
49337 a, 0, a, b,
49338 );
49339 assert_eq_m512(r, a);
49340 let r = _mm512_mask_scalef_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
49341 a,
49342 0b11111111_00000000,
49343 a,
49344 b,
49345 );
49346 let e = _mm512_set_ps(
49347 8., 8., 8., 8., 8., 8., 8., 8., 1., 1., 1., 1., 1., 1., 1., 1.,
49348 );
49349 assert_eq_m512(r, e);
49350 }
49351
49352 #[simd_test(enable = "avx512f")]
49353 fn test_mm512_maskz_scalef_round_ps() {
49354 let a = _mm512_set1_ps(1.);
49355 let b = _mm512_set1_ps(3.);
49356 let r = _mm512_maskz_scalef_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
49357 0, a, b,
49358 );
49359 assert_eq_m512(r, _mm512_setzero_ps());
49360 let r = _mm512_maskz_scalef_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
49361 0b11111111_00000000,
49362 a,
49363 b,
49364 );
49365 let e = _mm512_set_ps(
49366 8., 8., 8., 8., 8., 8., 8., 8., 0., 0., 0., 0., 0., 0., 0., 0.,
49367 );
49368 assert_eq_m512(r, e);
49369 }
49370
49371 #[simd_test(enable = "avx512f")]
49372 fn test_mm512_fixupimm_round_ps() {
49373 let a = _mm512_set1_ps(f32::NAN);
49374 let b = _mm512_set1_ps(f32::MAX);
49375 let c = _mm512_set1_epi32(i32::MAX);
49376 let r = _mm512_fixupimm_round_ps::<5, _MM_FROUND_CUR_DIRECTION>(a, b, c);
49377 let e = _mm512_set1_ps(0.0);
49378 assert_eq_m512(r, e);
49379 }
49380
49381 #[simd_test(enable = "avx512f")]
49382 fn test_mm512_mask_fixupimm_round_ps() {
49383 #[rustfmt::skip]
49384 let a = _mm512_set_ps(
49385 f32::NAN, f32::NAN, f32::NAN, f32::NAN,
49386 f32::NAN, f32::NAN, f32::NAN, f32::NAN,
49387 1., 1., 1., 1.,
49388 1., 1., 1., 1.,
49389 );
49390 let b = _mm512_set1_ps(f32::MAX);
49391 let c = _mm512_set1_epi32(i32::MAX);
49392 let r = _mm512_mask_fixupimm_round_ps::<5, _MM_FROUND_CUR_DIRECTION>(
49393 a,
49394 0b11111111_00000000,
49395 b,
49396 c,
49397 );
49398 let e = _mm512_set_ps(
49399 0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 1., 1., 1., 1., 1., 1.,
49400 );
49401 assert_eq_m512(r, e);
49402 }
49403
49404 #[simd_test(enable = "avx512f")]
49405 fn test_mm512_maskz_fixupimm_round_ps() {
49406 #[rustfmt::skip]
49407 let a = _mm512_set_ps(
49408 f32::NAN, f32::NAN, f32::NAN, f32::NAN,
49409 f32::NAN, f32::NAN, f32::NAN, f32::NAN,
49410 1., 1., 1., 1.,
49411 1., 1., 1., 1.,
49412 );
49413 let b = _mm512_set1_ps(f32::MAX);
49414 let c = _mm512_set1_epi32(i32::MAX);
49415 let r = _mm512_maskz_fixupimm_round_ps::<5, _MM_FROUND_CUR_DIRECTION>(
49416 0b11111111_00000000,
49417 a,
49418 b,
49419 c,
49420 );
49421 let e = _mm512_set_ps(
49422 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
49423 );
49424 assert_eq_m512(r, e);
49425 }
49426
49427 #[simd_test(enable = "avx512f")]
49428 fn test_mm512_getmant_round_ps() {
49429 let a = _mm512_set1_ps(10.);
49430 let r = _mm512_getmant_round_ps::<
49431 _MM_MANT_NORM_1_2,
49432 _MM_MANT_SIGN_SRC,
49433 _MM_FROUND_CUR_DIRECTION,
49434 >(a);
49435 let e = _mm512_set1_ps(1.25);
49436 assert_eq_m512(r, e);
49437 }
49438
49439 #[simd_test(enable = "avx512f")]
49440 fn test_mm512_mask_getmant_round_ps() {
49441 let a = _mm512_set1_ps(10.);
49442 let r = _mm512_mask_getmant_round_ps::<
49443 _MM_MANT_NORM_1_2,
49444 _MM_MANT_SIGN_SRC,
49445 _MM_FROUND_CUR_DIRECTION,
49446 >(a, 0, a);
49447 assert_eq_m512(r, a);
49448 let r = _mm512_mask_getmant_round_ps::<
49449 _MM_MANT_NORM_1_2,
49450 _MM_MANT_SIGN_SRC,
49451 _MM_FROUND_CUR_DIRECTION,
49452 >(a, 0b11111111_00000000, a);
49453 let e = _mm512_setr_ps(
49454 10., 10., 10., 10., 10., 10., 10., 10., 1.25, 1.25, 1.25, 1.25, 1.25, 1.25, 1.25, 1.25,
49455 );
49456 assert_eq_m512(r, e);
49457 }
49458
49459 #[simd_test(enable = "avx512f")]
49460 fn test_mm512_maskz_getmant_round_ps() {
49461 let a = _mm512_set1_ps(10.);
49462 let r = _mm512_maskz_getmant_round_ps::<
49463 _MM_MANT_NORM_1_2,
49464 _MM_MANT_SIGN_SRC,
49465 _MM_FROUND_CUR_DIRECTION,
49466 >(0, a);
49467 assert_eq_m512(r, _mm512_setzero_ps());
49468 let r = _mm512_maskz_getmant_round_ps::<
49469 _MM_MANT_NORM_1_2,
49470 _MM_MANT_SIGN_SRC,
49471 _MM_FROUND_CUR_DIRECTION,
49472 >(0b11111111_00000000, a);
49473 let e = _mm512_setr_ps(
49474 0., 0., 0., 0., 0., 0., 0., 0., 1.25, 1.25, 1.25, 1.25, 1.25, 1.25, 1.25, 1.25,
49475 );
49476 assert_eq_m512(r, e);
49477 }
49478
49479 #[simd_test(enable = "avx512f")]
49480 fn test_mm512_cvtps_epi32() {
49481 let a = _mm512_setr_ps(
49482 0., -1.4, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
49483 );
49484 let r = _mm512_cvtps_epi32(a);
49485 let e = _mm512_setr_epi32(0, -1, 2, -4, 4, -6, 6, -8, 8, 10, 10, 12, 12, 14, 14, 16);
49486 assert_eq_m512i(r, e);
49487 }
49488
49489 #[simd_test(enable = "avx512f")]
49490 fn test_mm512_mask_cvtps_epi32() {
49491 let a = _mm512_setr_ps(
49492 0., -1.4, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
49493 );
49494 let src = _mm512_set1_epi32(0);
49495 let r = _mm512_mask_cvtps_epi32(src, 0, a);
49496 assert_eq_m512i(r, src);
49497 let r = _mm512_mask_cvtps_epi32(src, 0b00000000_11111111, a);
49498 let e = _mm512_setr_epi32(0, -1, 2, -4, 4, -6, 6, -8, 0, 0, 0, 0, 0, 0, 0, 0);
49499 assert_eq_m512i(r, e);
49500 }
49501
49502 #[simd_test(enable = "avx512f")]
49503 fn test_mm512_maskz_cvtps_epi32() {
49504 let a = _mm512_setr_ps(
49505 0., -1.4, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
49506 );
49507 let r = _mm512_maskz_cvtps_epi32(0, a);
49508 assert_eq_m512i(r, _mm512_setzero_si512());
49509 let r = _mm512_maskz_cvtps_epi32(0b00000000_11111111, a);
49510 let e = _mm512_setr_epi32(0, -1, 2, -4, 4, -6, 6, -8, 0, 0, 0, 0, 0, 0, 0, 0);
49511 assert_eq_m512i(r, e);
49512 }
49513
49514 #[simd_test(enable = "avx512f,avx512vl")]
49515 fn test_mm256_mask_cvtps_epi32() {
49516 let a = _mm256_set_ps(8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5);
49517 let src = _mm256_set1_epi32(0);
49518 let r = _mm256_mask_cvtps_epi32(src, 0, a);
49519 assert_eq_m256i(r, src);
49520 let r = _mm256_mask_cvtps_epi32(src, 0b11111111, a);
49521 let e = _mm256_set_epi32(8, 10, 10, 12, 12, 14, 14, 16);
49522 assert_eq_m256i(r, e);
49523 }
49524
49525 #[simd_test(enable = "avx512f,avx512vl")]
49526 fn test_mm256_maskz_cvtps_epi32() {
49527 let a = _mm256_set_ps(8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5);
49528 let r = _mm256_maskz_cvtps_epi32(0, a);
49529 assert_eq_m256i(r, _mm256_setzero_si256());
49530 let r = _mm256_maskz_cvtps_epi32(0b11111111, a);
49531 let e = _mm256_set_epi32(8, 10, 10, 12, 12, 14, 14, 16);
49532 assert_eq_m256i(r, e);
49533 }
49534
49535 #[simd_test(enable = "avx512f,avx512vl")]
49536 fn test_mm_mask_cvtps_epi32() {
49537 let a = _mm_set_ps(12., 13.5, 14., 15.5);
49538 let src = _mm_set1_epi32(0);
49539 let r = _mm_mask_cvtps_epi32(src, 0, a);
49540 assert_eq_m128i(r, src);
49541 let r = _mm_mask_cvtps_epi32(src, 0b00001111, a);
49542 let e = _mm_set_epi32(12, 14, 14, 16);
49543 assert_eq_m128i(r, e);
49544 }
49545
49546 #[simd_test(enable = "avx512f,avx512vl")]
49547 fn test_mm_maskz_cvtps_epi32() {
49548 let a = _mm_set_ps(12., 13.5, 14., 15.5);
49549 let r = _mm_maskz_cvtps_epi32(0, a);
49550 assert_eq_m128i(r, _mm_setzero_si128());
49551 let r = _mm_maskz_cvtps_epi32(0b00001111, a);
49552 let e = _mm_set_epi32(12, 14, 14, 16);
49553 assert_eq_m128i(r, e);
49554 }
49555
49556 #[simd_test(enable = "avx512f")]
49557 fn test_mm512_cvtps_epu32() {
49558 let a = _mm512_setr_ps(
49559 0., -1.4, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
49560 );
49561 let r = _mm512_cvtps_epu32(a);
49562 let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 8, 10, 10, 12, 12, 14, 14, 16);
49563 assert_eq_m512i(r, e);
49564 }
49565
49566 #[simd_test(enable = "avx512f")]
49567 fn test_mm512_mask_cvtps_epu32() {
49568 let a = _mm512_setr_ps(
49569 0., -1.4, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
49570 );
49571 let src = _mm512_set1_epi32(0);
49572 let r = _mm512_mask_cvtps_epu32(src, 0, a);
49573 assert_eq_m512i(r, src);
49574 let r = _mm512_mask_cvtps_epu32(src, 0b00000000_11111111, a);
49575 let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 0, 0, 0, 0, 0, 0, 0, 0);
49576 assert_eq_m512i(r, e);
49577 }
49578
49579 #[simd_test(enable = "avx512f")]
49580 fn test_mm512_maskz_cvtps_epu32() {
49581 let a = _mm512_setr_ps(
49582 0., -1.4, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
49583 );
49584 let r = _mm512_maskz_cvtps_epu32(0, a);
49585 assert_eq_m512i(r, _mm512_setzero_si512());
49586 let r = _mm512_maskz_cvtps_epu32(0b00000000_11111111, a);
49587 let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 0, 0, 0, 0, 0, 0, 0, 0);
49588 assert_eq_m512i(r, e);
49589 }
49590
49591 #[simd_test(enable = "avx512f,avx512vl")]
49592 fn test_mm256_cvtps_epu32() {
49593 let a = _mm256_set_ps(8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5);
49594 let r = _mm256_cvtps_epu32(a);
49595 let e = _mm256_set_epi32(8, 10, 10, 12, 12, 14, 14, 16);
49596 assert_eq_m256i(r, e);
49597 }
49598
49599 #[simd_test(enable = "avx512f,avx512vl")]
49600 fn test_mm256_mask_cvtps_epu32() {
49601 let a = _mm256_set_ps(8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5);
49602 let src = _mm256_set1_epi32(0);
49603 let r = _mm256_mask_cvtps_epu32(src, 0, a);
49604 assert_eq_m256i(r, src);
49605 let r = _mm256_mask_cvtps_epu32(src, 0b11111111, a);
49606 let e = _mm256_set_epi32(8, 10, 10, 12, 12, 14, 14, 16);
49607 assert_eq_m256i(r, e);
49608 }
49609
49610 #[simd_test(enable = "avx512f,avx512vl")]
49611 fn test_mm256_maskz_cvtps_epu32() {
49612 let a = _mm256_set_ps(8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5);
49613 let r = _mm256_maskz_cvtps_epu32(0, a);
49614 assert_eq_m256i(r, _mm256_setzero_si256());
49615 let r = _mm256_maskz_cvtps_epu32(0b11111111, a);
49616 let e = _mm256_set_epi32(8, 10, 10, 12, 12, 14, 14, 16);
49617 assert_eq_m256i(r, e);
49618 }
49619
49620 #[simd_test(enable = "avx512f,avx512vl")]
49621 fn test_mm_cvtps_epu32() {
49622 let a = _mm_set_ps(12., 13.5, 14., 15.5);
49623 let r = _mm_cvtps_epu32(a);
49624 let e = _mm_set_epi32(12, 14, 14, 16);
49625 assert_eq_m128i(r, e);
49626 }
49627
49628 #[simd_test(enable = "avx512f,avx512vl")]
49629 fn test_mm_mask_cvtps_epu32() {
49630 let a = _mm_set_ps(12., 13.5, 14., 15.5);
49631 let src = _mm_set1_epi32(0);
49632 let r = _mm_mask_cvtps_epu32(src, 0, a);
49633 assert_eq_m128i(r, src);
49634 let r = _mm_mask_cvtps_epu32(src, 0b00001111, a);
49635 let e = _mm_set_epi32(12, 14, 14, 16);
49636 assert_eq_m128i(r, e);
49637 }
49638
49639 #[simd_test(enable = "avx512f,avx512vl")]
49640 fn test_mm_maskz_cvtps_epu32() {
49641 let a = _mm_set_ps(12., 13.5, 14., 15.5);
49642 let r = _mm_maskz_cvtps_epu32(0, a);
49643 assert_eq_m128i(r, _mm_setzero_si128());
49644 let r = _mm_maskz_cvtps_epu32(0b00001111, a);
49645 let e = _mm_set_epi32(12, 14, 14, 16);
49646 assert_eq_m128i(r, e);
49647 }
49648
49649 #[simd_test(enable = "avx512f")]
49650 const fn test_mm512_cvtepi8_epi32() {
49651 let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
49652 let r = _mm512_cvtepi8_epi32(a);
49653 let e = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
49654 assert_eq_m512i(r, e);
49655 }
49656
49657 #[simd_test(enable = "avx512f")]
49658 const fn test_mm512_mask_cvtepi8_epi32() {
49659 let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
49660 let src = _mm512_set1_epi32(-1);
49661 let r = _mm512_mask_cvtepi8_epi32(src, 0, a);
49662 assert_eq_m512i(r, src);
49663 let r = _mm512_mask_cvtepi8_epi32(src, 0b00000000_11111111, a);
49664 let e = _mm512_set_epi32(-1, -1, -1, -1, -1, -1, -1, -1, 8, 9, 10, 11, 12, 13, 14, 15);
49665 assert_eq_m512i(r, e);
49666 }
49667
49668 #[simd_test(enable = "avx512f")]
49669 const fn test_mm512_maskz_cvtepi8_epi32() {
49670 let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
49671 let r = _mm512_maskz_cvtepi8_epi32(0, a);
49672 assert_eq_m512i(r, _mm512_setzero_si512());
49673 let r = _mm512_maskz_cvtepi8_epi32(0b00000000_11111111, a);
49674 let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
49675 assert_eq_m512i(r, e);
49676 }
49677
49678 #[simd_test(enable = "avx512f,avx512vl")]
49679 const fn test_mm256_mask_cvtepi8_epi32() {
49680 let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
49681 let src = _mm256_set1_epi32(-1);
49682 let r = _mm256_mask_cvtepi8_epi32(src, 0, a);
49683 assert_eq_m256i(r, src);
49684 let r = _mm256_mask_cvtepi8_epi32(src, 0b11111111, a);
49685 let e = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15);
49686 assert_eq_m256i(r, e);
49687 }
49688
49689 #[simd_test(enable = "avx512f,avx512vl")]
49690 const fn test_mm256_maskz_cvtepi8_epi32() {
49691 let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
49692 let r = _mm256_maskz_cvtepi8_epi32(0, a);
49693 assert_eq_m256i(r, _mm256_setzero_si256());
49694 let r = _mm256_maskz_cvtepi8_epi32(0b11111111, a);
49695 let e = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15);
49696 assert_eq_m256i(r, e);
49697 }
49698
49699 #[simd_test(enable = "avx512f,avx512vl")]
49700 const fn test_mm_mask_cvtepi8_epi32() {
49701 let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
49702 let src = _mm_set1_epi32(-1);
49703 let r = _mm_mask_cvtepi8_epi32(src, 0, a);
49704 assert_eq_m128i(r, src);
49705 let r = _mm_mask_cvtepi8_epi32(src, 0b00001111, a);
49706 let e = _mm_set_epi32(12, 13, 14, 15);
49707 assert_eq_m128i(r, e);
49708 }
49709
49710 #[simd_test(enable = "avx512f,avx512vl")]
49711 const fn test_mm_maskz_cvtepi8_epi32() {
49712 let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
49713 let r = _mm_maskz_cvtepi8_epi32(0, a);
49714 assert_eq_m128i(r, _mm_setzero_si128());
49715 let r = _mm_maskz_cvtepi8_epi32(0b00001111, a);
49716 let e = _mm_set_epi32(12, 13, 14, 15);
49717 assert_eq_m128i(r, e);
49718 }
49719
49720 #[simd_test(enable = "avx512f")]
49721 const fn test_mm512_cvtepu8_epi32() {
49722 let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
49723 let r = _mm512_cvtepu8_epi32(a);
49724 let e = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
49725 assert_eq_m512i(r, e);
49726 }
49727
49728 #[simd_test(enable = "avx512f")]
49729 const fn test_mm512_mask_cvtepu8_epi32() {
49730 let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
49731 let src = _mm512_set1_epi32(-1);
49732 let r = _mm512_mask_cvtepu8_epi32(src, 0, a);
49733 assert_eq_m512i(r, src);
49734 let r = _mm512_mask_cvtepu8_epi32(src, 0b00000000_11111111, a);
49735 let e = _mm512_set_epi32(-1, -1, -1, -1, -1, -1, -1, -1, 8, 9, 10, 11, 12, 13, 14, 15);
49736 assert_eq_m512i(r, e);
49737 }
49738
49739 #[simd_test(enable = "avx512f")]
49740 const fn test_mm512_maskz_cvtepu8_epi32() {
49741 let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
49742 let r = _mm512_maskz_cvtepu8_epi32(0, a);
49743 assert_eq_m512i(r, _mm512_setzero_si512());
49744 let r = _mm512_maskz_cvtepu8_epi32(0b00000000_11111111, a);
49745 let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
49746 assert_eq_m512i(r, e);
49747 }
49748
49749 #[simd_test(enable = "avx512f,avx512vl")]
49750 const fn test_mm256_mask_cvtepu8_epi32() {
49751 let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
49752 let src = _mm256_set1_epi32(-1);
49753 let r = _mm256_mask_cvtepu8_epi32(src, 0, a);
49754 assert_eq_m256i(r, src);
49755 let r = _mm256_mask_cvtepu8_epi32(src, 0b11111111, a);
49756 let e = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15);
49757 assert_eq_m256i(r, e);
49758 }
49759
49760 #[simd_test(enable = "avx512f,avx512vl")]
49761 const fn test_mm256_maskz_cvtepu8_epi32() {
49762 let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
49763 let r = _mm256_maskz_cvtepu8_epi32(0, a);
49764 assert_eq_m256i(r, _mm256_setzero_si256());
49765 let r = _mm256_maskz_cvtepu8_epi32(0b11111111, a);
49766 let e = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15);
49767 assert_eq_m256i(r, e);
49768 }
49769
49770 #[simd_test(enable = "avx512f,avx512vl")]
49771 const fn test_mm_mask_cvtepu8_epi32() {
49772 let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
49773 let src = _mm_set1_epi32(-1);
49774 let r = _mm_mask_cvtepu8_epi32(src, 0, a);
49775 assert_eq_m128i(r, src);
49776 let r = _mm_mask_cvtepu8_epi32(src, 0b00001111, a);
49777 let e = _mm_set_epi32(12, 13, 14, 15);
49778 assert_eq_m128i(r, e);
49779 }
49780
49781 #[simd_test(enable = "avx512f,avx512vl")]
49782 const fn test_mm_maskz_cvtepu8_epi32() {
49783 let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
49784 let r = _mm_maskz_cvtepu8_epi32(0, a);
49785 assert_eq_m128i(r, _mm_setzero_si128());
49786 let r = _mm_maskz_cvtepu8_epi32(0b00001111, a);
49787 let e = _mm_set_epi32(12, 13, 14, 15);
49788 assert_eq_m128i(r, e);
49789 }
49790
49791 #[simd_test(enable = "avx512f")]
49792 const fn test_mm512_cvtepi16_epi32() {
49793 let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
49794 let r = _mm512_cvtepi16_epi32(a);
49795 let e = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
49796 assert_eq_m512i(r, e);
49797 }
49798
49799 #[simd_test(enable = "avx512f")]
49800 const fn test_mm512_mask_cvtepi16_epi32() {
49801 let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
49802 let src = _mm512_set1_epi32(-1);
49803 let r = _mm512_mask_cvtepi16_epi32(src, 0, a);
49804 assert_eq_m512i(r, src);
49805 let r = _mm512_mask_cvtepi16_epi32(src, 0b00000000_11111111, a);
49806 let e = _mm512_set_epi32(-1, -1, -1, -1, -1, -1, -1, -1, 8, 9, 10, 11, 12, 13, 14, 15);
49807 assert_eq_m512i(r, e);
49808 }
49809
49810 #[simd_test(enable = "avx512f")]
49811 const fn test_mm512_maskz_cvtepi16_epi32() {
49812 let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
49813 let r = _mm512_maskz_cvtepi16_epi32(0, a);
49814 assert_eq_m512i(r, _mm512_setzero_si512());
49815 let r = _mm512_maskz_cvtepi16_epi32(0b00000000_11111111, a);
49816 let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
49817 assert_eq_m512i(r, e);
49818 }
49819
49820 #[simd_test(enable = "avx512f,avx512vl")]
49821 const fn test_mm256_mask_cvtepi16_epi32() {
49822 let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
49823 let src = _mm256_set1_epi32(-1);
49824 let r = _mm256_mask_cvtepi16_epi32(src, 0, a);
49825 assert_eq_m256i(r, src);
49826 let r = _mm256_mask_cvtepi16_epi32(src, 0b11111111, a);
49827 let e = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
49828 assert_eq_m256i(r, e);
49829 }
49830
49831 #[simd_test(enable = "avx512f,avx512vl")]
49832 const fn test_mm256_maskz_cvtepi16_epi32() {
49833 let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
49834 let r = _mm256_maskz_cvtepi16_epi32(0, a);
49835 assert_eq_m256i(r, _mm256_setzero_si256());
49836 let r = _mm256_maskz_cvtepi16_epi32(0b11111111, a);
49837 let e = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
49838 assert_eq_m256i(r, e);
49839 }
49840
49841 #[simd_test(enable = "avx512f,avx512vl")]
49842 const fn test_mm_mask_cvtepi16_epi32() {
49843 let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
49844 let src = _mm_set1_epi32(-1);
49845 let r = _mm_mask_cvtepi16_epi32(src, 0, a);
49846 assert_eq_m128i(r, src);
49847 let r = _mm_mask_cvtepi16_epi32(src, 0b00001111, a);
49848 let e = _mm_set_epi32(4, 5, 6, 7);
49849 assert_eq_m128i(r, e);
49850 }
49851
49852 #[simd_test(enable = "avx512f,avx512vl")]
49853 const fn test_mm_maskz_cvtepi16_epi32() {
49854 let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
49855 let r = _mm_maskz_cvtepi16_epi32(0, a);
49856 assert_eq_m128i(r, _mm_setzero_si128());
49857 let r = _mm_maskz_cvtepi16_epi32(0b00001111, a);
49858 let e = _mm_set_epi32(4, 5, 6, 7);
49859 assert_eq_m128i(r, e);
49860 }
49861
49862 #[simd_test(enable = "avx512f")]
49863 const fn test_mm512_cvtepu16_epi32() {
49864 let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
49865 let r = _mm512_cvtepu16_epi32(a);
49866 let e = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
49867 assert_eq_m512i(r, e);
49868 }
49869
49870 #[simd_test(enable = "avx512f")]
49871 const fn test_mm512_mask_cvtepu16_epi32() {
49872 let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
49873 let src = _mm512_set1_epi32(-1);
49874 let r = _mm512_mask_cvtepu16_epi32(src, 0, a);
49875 assert_eq_m512i(r, src);
49876 let r = _mm512_mask_cvtepu16_epi32(src, 0b00000000_11111111, a);
49877 let e = _mm512_set_epi32(-1, -1, -1, -1, -1, -1, -1, -1, 8, 9, 10, 11, 12, 13, 14, 15);
49878 assert_eq_m512i(r, e);
49879 }
49880
49881 #[simd_test(enable = "avx512f")]
49882 const fn test_mm512_maskz_cvtepu16_epi32() {
49883 let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
49884 let r = _mm512_maskz_cvtepu16_epi32(0, a);
49885 assert_eq_m512i(r, _mm512_setzero_si512());
49886 let r = _mm512_maskz_cvtepu16_epi32(0b00000000_11111111, a);
49887 let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
49888 assert_eq_m512i(r, e);
49889 }
49890
49891 #[simd_test(enable = "avx512f,avx512vl")]
49892 const fn test_mm256_mask_cvtepu16_epi32() {
49893 let a = _mm_set_epi16(8, 9, 10, 11, 12, 13, 14, 15);
49894 let src = _mm256_set1_epi32(-1);
49895 let r = _mm256_mask_cvtepu16_epi32(src, 0, a);
49896 assert_eq_m256i(r, src);
49897 let r = _mm256_mask_cvtepu16_epi32(src, 0b11111111, a);
49898 let e = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15);
49899 assert_eq_m256i(r, e);
49900 }
49901
49902 #[simd_test(enable = "avx512f,avx512vl")]
49903 const fn test_mm256_maskz_cvtepu16_epi32() {
49904 let a = _mm_set_epi16(8, 9, 10, 11, 12, 13, 14, 15);
49905 let r = _mm256_maskz_cvtepu16_epi32(0, a);
49906 assert_eq_m256i(r, _mm256_setzero_si256());
49907 let r = _mm256_maskz_cvtepu16_epi32(0b11111111, a);
49908 let e = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15);
49909 assert_eq_m256i(r, e);
49910 }
49911
49912 #[simd_test(enable = "avx512f,avx512vl")]
49913 const fn test_mm_mask_cvtepu16_epi32() {
49914 let a = _mm_set_epi16(8, 9, 10, 11, 12, 13, 14, 15);
49915 let src = _mm_set1_epi32(-1);
49916 let r = _mm_mask_cvtepu16_epi32(src, 0, a);
49917 assert_eq_m128i(r, src);
49918 let r = _mm_mask_cvtepu16_epi32(src, 0b00001111, a);
49919 let e = _mm_set_epi32(12, 13, 14, 15);
49920 assert_eq_m128i(r, e);
49921 }
49922
49923 #[simd_test(enable = "avx512f,avx512vl")]
49924 const fn test_mm_maskz_cvtepu16_epi32() {
49925 let a = _mm_set_epi16(8, 9, 10, 11, 12, 13, 14, 15);
49926 let r = _mm_maskz_cvtepu16_epi32(0, a);
49927 assert_eq_m128i(r, _mm_setzero_si128());
49928 let r = _mm_maskz_cvtepu16_epi32(0b00001111, a);
49929 let e = _mm_set_epi32(12, 13, 14, 15);
49930 assert_eq_m128i(r, e);
49931 }
49932
49933 #[simd_test(enable = "avx512f")]
49934 const fn test_mm512_cvtepi32_ps() {
49935 let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
49936 let r = _mm512_cvtepi32_ps(a);
49937 let e = _mm512_set_ps(
49938 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
49939 );
49940 assert_eq_m512(r, e);
49941 }
49942
49943 #[simd_test(enable = "avx512f")]
49944 const fn test_mm512_mask_cvtepi32_ps() {
49945 let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
49946 let src = _mm512_set1_ps(-1.);
49947 let r = _mm512_mask_cvtepi32_ps(src, 0, a);
49948 assert_eq_m512(r, src);
49949 let r = _mm512_mask_cvtepi32_ps(src, 0b00000000_11111111, a);
49950 let e = _mm512_set_ps(
49951 -1., -1., -1., -1., -1., -1., -1., -1., 8., 9., 10., 11., 12., 13., 14., 15.,
49952 );
49953 assert_eq_m512(r, e);
49954 }
49955
49956 #[simd_test(enable = "avx512f")]
49957 const fn test_mm512_maskz_cvtepi32_ps() {
49958 let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
49959 let r = _mm512_maskz_cvtepi32_ps(0, a);
49960 assert_eq_m512(r, _mm512_setzero_ps());
49961 let r = _mm512_maskz_cvtepi32_ps(0b00000000_11111111, a);
49962 let e = _mm512_set_ps(
49963 0., 0., 0., 0., 0., 0., 0., 0., 8., 9., 10., 11., 12., 13., 14., 15.,
49964 );
49965 assert_eq_m512(r, e);
49966 }
49967
49968 #[simd_test(enable = "avx512f,avx512vl")]
49969 const fn test_mm256_mask_cvtepi32_ps() {
49970 let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
49971 let src = _mm256_set1_ps(-1.);
49972 let r = _mm256_mask_cvtepi32_ps(src, 0, a);
49973 assert_eq_m256(r, src);
49974 let r = _mm256_mask_cvtepi32_ps(src, 0b11111111, a);
49975 let e = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
49976 assert_eq_m256(r, e);
49977 }
49978
49979 #[simd_test(enable = "avx512f,avx512vl")]
49980 const fn test_mm256_maskz_cvtepi32_ps() {
49981 let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
49982 let r = _mm256_maskz_cvtepi32_ps(0, a);
49983 assert_eq_m256(r, _mm256_setzero_ps());
49984 let r = _mm256_maskz_cvtepi32_ps(0b11111111, a);
49985 let e = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
49986 assert_eq_m256(r, e);
49987 }
49988
49989 #[simd_test(enable = "avx512f,avx512vl")]
49990 const fn test_mm_mask_cvtepi32_ps() {
49991 let a = _mm_set_epi32(1, 2, 3, 4);
49992 let src = _mm_set1_ps(-1.);
49993 let r = _mm_mask_cvtepi32_ps(src, 0, a);
49994 assert_eq_m128(r, src);
49995 let r = _mm_mask_cvtepi32_ps(src, 0b00001111, a);
49996 let e = _mm_set_ps(1., 2., 3., 4.);
49997 assert_eq_m128(r, e);
49998 }
49999
50000 #[simd_test(enable = "avx512f,avx512vl")]
50001 const fn test_mm_maskz_cvtepi32_ps() {
50002 let a = _mm_set_epi32(1, 2, 3, 4);
50003 let r = _mm_maskz_cvtepi32_ps(0, a);
50004 assert_eq_m128(r, _mm_setzero_ps());
50005 let r = _mm_maskz_cvtepi32_ps(0b00001111, a);
50006 let e = _mm_set_ps(1., 2., 3., 4.);
50007 assert_eq_m128(r, e);
50008 }
50009
50010 #[simd_test(enable = "avx512f")]
50011 const fn test_mm512_cvtepu32_ps() {
50012 let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
50013 let r = _mm512_cvtepu32_ps(a);
50014 let e = _mm512_set_ps(
50015 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
50016 );
50017 assert_eq_m512(r, e);
50018 }
50019
50020 #[simd_test(enable = "avx512f")]
50021 const fn test_mm512_mask_cvtepu32_ps() {
50022 let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
50023 let src = _mm512_set1_ps(-1.);
50024 let r = _mm512_mask_cvtepu32_ps(src, 0, a);
50025 assert_eq_m512(r, src);
50026 let r = _mm512_mask_cvtepu32_ps(src, 0b00000000_11111111, a);
50027 let e = _mm512_set_ps(
50028 -1., -1., -1., -1., -1., -1., -1., -1., 8., 9., 10., 11., 12., 13., 14., 15.,
50029 );
50030 assert_eq_m512(r, e);
50031 }
50032
50033 #[simd_test(enable = "avx512f")]
50034 const fn test_mm512_maskz_cvtepu32_ps() {
50035 let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
50036 let r = _mm512_maskz_cvtepu32_ps(0, a);
50037 assert_eq_m512(r, _mm512_setzero_ps());
50038 let r = _mm512_maskz_cvtepu32_ps(0b00000000_11111111, a);
50039 let e = _mm512_set_ps(
50040 0., 0., 0., 0., 0., 0., 0., 0., 8., 9., 10., 11., 12., 13., 14., 15.,
50041 );
50042 assert_eq_m512(r, e);
50043 }
50044
50045 #[simd_test(enable = "avx512f")]
50046 const fn test_mm512_cvtepi32_epi16() {
50047 let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
50048 let r = _mm512_cvtepi32_epi16(a);
50049 let e = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
50050 assert_eq_m256i(r, e);
50051 }
50052
50053 #[simd_test(enable = "avx512f")]
50054 const fn test_mm512_mask_cvtepi32_epi16() {
50055 let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
50056 let src = _mm256_set1_epi16(-1);
50057 let r = _mm512_mask_cvtepi32_epi16(src, 0, a);
50058 assert_eq_m256i(r, src);
50059 let r = _mm512_mask_cvtepi32_epi16(src, 0b00000000_11111111, a);
50060 let e = _mm256_set_epi16(-1, -1, -1, -1, -1, -1, -1, -1, 8, 9, 10, 11, 12, 13, 14, 15);
50061 assert_eq_m256i(r, e);
50062 }
50063
50064 #[simd_test(enable = "avx512f")]
50065 const fn test_mm512_maskz_cvtepi32_epi16() {
50066 let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
50067 let r = _mm512_maskz_cvtepi32_epi16(0, a);
50068 assert_eq_m256i(r, _mm256_setzero_si256());
50069 let r = _mm512_maskz_cvtepi32_epi16(0b00000000_11111111, a);
50070 let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
50071 assert_eq_m256i(r, e);
50072 }
50073
50074 #[simd_test(enable = "avx512f,avx512vl")]
50075 const fn test_mm256_cvtepi32_epi16() {
50076 let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
50077 let r = _mm256_cvtepi32_epi16(a);
50078 let e = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
50079 assert_eq_m128i(r, e);
50080 }
50081
50082 #[simd_test(enable = "avx512f,avx512vl")]
50083 const fn test_mm256_mask_cvtepi32_epi16() {
50084 let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
50085 let src = _mm_set1_epi16(-1);
50086 let r = _mm256_mask_cvtepi32_epi16(src, 0, a);
50087 assert_eq_m128i(r, src);
50088 let r = _mm256_mask_cvtepi32_epi16(src, 0b11111111, a);
50089 let e = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
50090 assert_eq_m128i(r, e);
50091 }
50092
50093 #[simd_test(enable = "avx512f,avx512vl")]
50094 const fn test_mm256_maskz_cvtepi32_epi16() {
50095 let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
50096 let r = _mm256_maskz_cvtepi32_epi16(0, a);
50097 assert_eq_m128i(r, _mm_setzero_si128());
50098 let r = _mm256_maskz_cvtepi32_epi16(0b11111111, a);
50099 let e = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
50100 assert_eq_m128i(r, e);
50101 }
50102
50103 #[simd_test(enable = "avx512f,avx512vl")]
50104 fn test_mm_cvtepi32_epi16() {
50105 let a = _mm_set_epi32(4, 5, 6, 7);
50106 let r = _mm_cvtepi32_epi16(a);
50107 let e = _mm_set_epi16(0, 0, 0, 0, 4, 5, 6, 7);
50108 assert_eq_m128i(r, e);
50109 }
50110
50111 #[simd_test(enable = "avx512f,avx512vl")]
50112 fn test_mm_mask_cvtepi32_epi16() {
50113 let a = _mm_set_epi32(4, 5, 6, 7);
50114 let src = _mm_set1_epi16(0);
50115 let r = _mm_mask_cvtepi32_epi16(src, 0, a);
50116 assert_eq_m128i(r, src);
50117 let r = _mm_mask_cvtepi32_epi16(src, 0b00001111, a);
50118 let e = _mm_set_epi16(0, 0, 0, 0, 4, 5, 6, 7);
50119 assert_eq_m128i(r, e);
50120 }
50121
50122 #[simd_test(enable = "avx512f,avx512vl")]
50123 fn test_mm_maskz_cvtepi32_epi16() {
50124 let a = _mm_set_epi32(4, 5, 6, 7);
50125 let r = _mm_maskz_cvtepi32_epi16(0, a);
50126 assert_eq_m128i(r, _mm_setzero_si128());
50127 let r = _mm_maskz_cvtepi32_epi16(0b00001111, a);
50128 let e = _mm_set_epi16(0, 0, 0, 0, 4, 5, 6, 7);
50129 assert_eq_m128i(r, e);
50130 }
50131
50132 #[simd_test(enable = "avx512f")]
50133 const fn test_mm512_cvtepi32_epi8() {
50134 let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
50135 let r = _mm512_cvtepi32_epi8(a);
50136 let e = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
50137 assert_eq_m128i(r, e);
50138 }
50139
50140 #[simd_test(enable = "avx512f")]
50141 const fn test_mm512_mask_cvtepi32_epi8() {
50142 let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
50143 let src = _mm_set1_epi8(-1);
50144 let r = _mm512_mask_cvtepi32_epi8(src, 0, a);
50145 assert_eq_m128i(r, src);
50146 let r = _mm512_mask_cvtepi32_epi8(src, 0b00000000_11111111, a);
50147 let e = _mm_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, 8, 9, 10, 11, 12, 13, 14, 15);
50148 assert_eq_m128i(r, e);
50149 }
50150
50151 #[simd_test(enable = "avx512f")]
50152 const fn test_mm512_maskz_cvtepi32_epi8() {
50153 let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
50154 let r = _mm512_maskz_cvtepi32_epi8(0, a);
50155 assert_eq_m128i(r, _mm_setzero_si128());
50156 let r = _mm512_maskz_cvtepi32_epi8(0b00000000_11111111, a);
50157 let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
50158 assert_eq_m128i(r, e);
50159 }
50160
50161 #[simd_test(enable = "avx512f,avx512vl")]
50162 fn test_mm256_cvtepi32_epi8() {
50163 let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
50164 let r = _mm256_cvtepi32_epi8(a);
50165 let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7);
50166 assert_eq_m128i(r, e);
50167 }
50168
50169 #[simd_test(enable = "avx512f,avx512vl")]
50170 fn test_mm256_mask_cvtepi32_epi8() {
50171 let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
50172 let src = _mm_set1_epi8(0);
50173 let r = _mm256_mask_cvtepi32_epi8(src, 0, a);
50174 assert_eq_m128i(r, src);
50175 let r = _mm256_mask_cvtepi32_epi8(src, 0b11111111, a);
50176 let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7);
50177 assert_eq_m128i(r, e);
50178 }
50179
50180 #[simd_test(enable = "avx512f,avx512vl")]
50181 fn test_mm256_maskz_cvtepi32_epi8() {
50182 let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
50183 let r = _mm256_maskz_cvtepi32_epi8(0, a);
50184 assert_eq_m128i(r, _mm_setzero_si128());
50185 let r = _mm256_maskz_cvtepi32_epi8(0b11111111, a);
50186 let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7);
50187 assert_eq_m128i(r, e);
50188 }
50189
50190 #[simd_test(enable = "avx512f,avx512vl")]
50191 fn test_mm_cvtepi32_epi8() {
50192 let a = _mm_set_epi32(4, 5, 6, 7);
50193 let r = _mm_cvtepi32_epi8(a);
50194 let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 5, 6, 7);
50195 assert_eq_m128i(r, e);
50196 }
50197
50198 #[simd_test(enable = "avx512f,avx512vl")]
50199 fn test_mm_mask_cvtepi32_epi8() {
50200 let a = _mm_set_epi32(4, 5, 6, 7);
50201 let src = _mm_set1_epi8(0);
50202 let r = _mm_mask_cvtepi32_epi8(src, 0, a);
50203 assert_eq_m128i(r, src);
50204 let r = _mm_mask_cvtepi32_epi8(src, 0b00001111, a);
50205 let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 5, 6, 7);
50206 assert_eq_m128i(r, e);
50207 }
50208
50209 #[simd_test(enable = "avx512f,avx512vl")]
50210 fn test_mm_maskz_cvtepi32_epi8() {
50211 let a = _mm_set_epi32(4, 5, 6, 7);
50212 let r = _mm_maskz_cvtepi32_epi8(0, a);
50213 assert_eq_m128i(r, _mm_setzero_si128());
50214 let r = _mm_maskz_cvtepi32_epi8(0b00001111, a);
50215 let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 5, 6, 7);
50216 assert_eq_m128i(r, e);
50217 }
50218
50219 #[simd_test(enable = "avx512f")]
50220 fn test_mm512_cvtsepi32_epi16() {
50221 #[rustfmt::skip]
50222 let a = _mm512_set_epi32(
50223 0, 1, 2, 3,
50224 4, 5, 6, 7,
50225 8, 9, 10, 11,
50226 12, 13, i32::MIN, i32::MAX,
50227 );
50228 let r = _mm512_cvtsepi32_epi16(a);
50229 #[rustfmt::skip]
50230 let e = _mm256_set_epi16(
50231 0, 1, 2, 3,
50232 4, 5, 6, 7,
50233 8, 9, 10, 11,
50234 12, 13, i16::MIN, i16::MAX,
50235 );
50236 assert_eq_m256i(r, e);
50237 }
50238
50239 #[simd_test(enable = "avx512f")]
50240 fn test_mm512_mask_cvtsepi32_epi16() {
50241 #[rustfmt::skip]
50242 let a = _mm512_set_epi32(
50243 0, 1, 2, 3,
50244 4, 5, 6, 7,
50245 8, 9, 10, 11,
50246 12, 13, i32::MIN, i32::MAX,
50247 );
50248 let src = _mm256_set1_epi16(-1);
50249 let r = _mm512_mask_cvtsepi32_epi16(src, 0, a);
50250 assert_eq_m256i(r, src);
50251 let r = _mm512_mask_cvtsepi32_epi16(src, 0b00000000_11111111, a);
50252 #[rustfmt::skip]
50253 let e = _mm256_set_epi16(
50254 -1, -1, -1, -1,
50255 -1, -1, -1, -1,
50256 8, 9, 10, 11,
50257 12, 13, i16::MIN, i16::MAX,
50258 );
50259 assert_eq_m256i(r, e);
50260 }
50261
50262 #[simd_test(enable = "avx512f")]
50263 fn test_mm512_maskz_cvtsepi32_epi16() {
50264 #[rustfmt::skip]
50265 let a = _mm512_set_epi32(
50266 0, 1, 2, 3,
50267 4, 5, 6, 7,
50268 8, 9, 10, 11,
50269 12, 13, i32::MIN, i32::MAX,
50270 );
50271 let r = _mm512_maskz_cvtsepi32_epi16(0, a);
50272 assert_eq_m256i(r, _mm256_setzero_si256());
50273 let r = _mm512_maskz_cvtsepi32_epi16(0b00000000_11111111, a);
50274 #[rustfmt::skip]
50275 let e = _mm256_set_epi16(
50276 0, 0, 0, 0,
50277 0, 0, 0, 0,
50278 8, 9, 10, 11,
50279 12, 13, i16::MIN, i16::MAX,
50280 );
50281 assert_eq_m256i(r, e);
50282 }
50283
50284 #[simd_test(enable = "avx512f,avx512vl")]
50285 fn test_mm256_cvtsepi32_epi16() {
50286 let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
50287 let r = _mm256_cvtsepi32_epi16(a);
50288 let e = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
50289 assert_eq_m128i(r, e);
50290 }
50291
50292 #[simd_test(enable = "avx512f,avx512vl")]
50293 fn test_mm256_mask_cvtsepi32_epi16() {
50294 let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
50295 let src = _mm_set1_epi16(-1);
50296 let r = _mm256_mask_cvtsepi32_epi16(src, 0, a);
50297 assert_eq_m128i(r, src);
50298 let r = _mm256_mask_cvtsepi32_epi16(src, 0b11111111, a);
50299 let e = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
50300 assert_eq_m128i(r, e);
50301 }
50302
50303 #[simd_test(enable = "avx512f,avx512vl")]
50304 fn test_mm256_maskz_cvtsepi32_epi16() {
50305 let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
50306 let r = _mm256_maskz_cvtsepi32_epi16(0, a);
50307 assert_eq_m128i(r, _mm_setzero_si128());
50308 let r = _mm256_maskz_cvtsepi32_epi16(0b11111111, a);
50309 let e = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
50310 assert_eq_m128i(r, e);
50311 }
50312
50313 #[simd_test(enable = "avx512f,avx512vl")]
50314 fn test_mm_cvtsepi32_epi16() {
50315 let a = _mm_set_epi32(4, 5, 6, 7);
50316 let r = _mm_cvtsepi32_epi16(a);
50317 let e = _mm_set_epi16(0, 0, 0, 0, 4, 5, 6, 7);
50318 assert_eq_m128i(r, e);
50319 }
50320
50321 #[simd_test(enable = "avx512f,avx512vl")]
50322 fn test_mm_mask_cvtsepi32_epi16() {
50323 let a = _mm_set_epi32(4, 5, 6, 7);
50324 let src = _mm_set1_epi16(0);
50325 let r = _mm_mask_cvtsepi32_epi16(src, 0, a);
50326 assert_eq_m128i(r, src);
50327 let r = _mm_mask_cvtsepi32_epi16(src, 0b11111111, a);
50328 let e = _mm_set_epi16(0, 0, 0, 0, 4, 5, 6, 7);
50329 assert_eq_m128i(r, e);
50330 }
50331
50332 #[simd_test(enable = "avx512f,avx512vl")]
50333 fn test_mm_maskz_cvtsepi32_epi16() {
50334 let a = _mm_set_epi32(4, 5, 6, 7);
50335 let r = _mm_maskz_cvtsepi32_epi16(0, a);
50336 assert_eq_m128i(r, _mm_setzero_si128());
50337 let r = _mm_maskz_cvtsepi32_epi16(0b11111111, a);
50338 let e = _mm_set_epi16(0, 0, 0, 0, 4, 5, 6, 7);
50339 assert_eq_m128i(r, e);
50340 }
50341
50342 #[simd_test(enable = "avx512f")]
50343 fn test_mm512_cvtsepi32_epi8() {
50344 #[rustfmt::skip]
50345 let a = _mm512_set_epi32(
50346 0, 1, 2, 3,
50347 4, 5, 6, 7,
50348 8, 9, 10, 11,
50349 12, 13, i32::MIN, i32::MAX,
50350 );
50351 let r = _mm512_cvtsepi32_epi8(a);
50352 #[rustfmt::skip]
50353 let e = _mm_set_epi8(
50354 0, 1, 2, 3,
50355 4, 5, 6, 7,
50356 8, 9, 10, 11,
50357 12, 13, i8::MIN, i8::MAX,
50358 );
50359 assert_eq_m128i(r, e);
50360 }
50361
50362 #[simd_test(enable = "avx512f")]
50363 fn test_mm512_mask_cvtsepi32_epi8() {
50364 #[rustfmt::skip]
50365 let a = _mm512_set_epi32(
50366 0, 1, 2, 3,
50367 4, 5, 6, 7,
50368 8, 9, 10, 11,
50369 12, 13, i32::MIN, i32::MAX,
50370 );
50371 let src = _mm_set1_epi8(-1);
50372 let r = _mm512_mask_cvtsepi32_epi8(src, 0, a);
50373 assert_eq_m128i(r, src);
50374 let r = _mm512_mask_cvtsepi32_epi8(src, 0b00000000_11111111, a);
50375 #[rustfmt::skip]
50376 let e = _mm_set_epi8(
50377 -1, -1, -1, -1,
50378 -1, -1, -1, -1,
50379 8, 9, 10, 11,
50380 12, 13, i8::MIN, i8::MAX,
50381 );
50382 assert_eq_m128i(r, e);
50383 }
50384
50385 #[simd_test(enable = "avx512f")]
50386 fn test_mm512_maskz_cvtsepi32_epi8() {
50387 #[rustfmt::skip]
50388 let a = _mm512_set_epi32(
50389 0, 1, 2, 3,
50390 4, 5, 6, 7,
50391 8, 9, 10, 11,
50392 12, 13, i32::MIN, i32::MAX,
50393 );
50394 let r = _mm512_maskz_cvtsepi32_epi8(0, a);
50395 assert_eq_m128i(r, _mm_setzero_si128());
50396 let r = _mm512_maskz_cvtsepi32_epi8(0b00000000_11111111, a);
50397 #[rustfmt::skip]
50398 let e = _mm_set_epi8(
50399 0, 0, 0, 0,
50400 0, 0, 0, 0,
50401 8, 9, 10, 11,
50402 12, 13, i8::MIN, i8::MAX,
50403 );
50404 assert_eq_m128i(r, e);
50405 }
50406
50407 #[simd_test(enable = "avx512f,avx512vl")]
50408 fn test_mm256_cvtsepi32_epi8() {
50409 let a = _mm256_set_epi32(9, 10, 11, 12, 13, 14, 15, 16);
50410 let r = _mm256_cvtsepi32_epi8(a);
50411 #[rustfmt::skip]
50412 let e = _mm_set_epi8(
50413 0, 0, 0, 0,
50414 0, 0, 0, 0,
50415 9, 10, 11, 12,
50416 13, 14, 15, 16,
50417 );
50418 assert_eq_m128i(r, e);
50419 }
50420
50421 #[simd_test(enable = "avx512f,avx512vl")]
50422 fn test_mm256_mask_cvtsepi32_epi8() {
50423 let a = _mm256_set_epi32(9, 10, 11, 12, 13, 14, 15, 16);
50424 let src = _mm_set1_epi8(0);
50425 let r = _mm256_mask_cvtsepi32_epi8(src, 0, a);
50426 assert_eq_m128i(r, src);
50427 let r = _mm256_mask_cvtsepi32_epi8(src, 0b11111111, a);
50428 #[rustfmt::skip]
50429 let e = _mm_set_epi8(
50430 0, 0, 0, 0,
50431 0, 0, 0, 0,
50432 9, 10, 11, 12,
50433 13, 14, 15, 16,
50434 );
50435 assert_eq_m128i(r, e);
50436 }
50437
50438 #[simd_test(enable = "avx512f,avx512vl")]
50439 fn test_mm256_maskz_cvtsepi32_epi8() {
50440 let a = _mm256_set_epi32(9, 10, 11, 12, 13, 14, 15, 16);
50441 let r = _mm256_maskz_cvtsepi32_epi8(0, a);
50442 assert_eq_m128i(r, _mm_setzero_si128());
50443 let r = _mm256_maskz_cvtsepi32_epi8(0b11111111, a);
50444 #[rustfmt::skip]
50445 let e = _mm_set_epi8(
50446 0, 0, 0, 0,
50447 0, 0, 0, 0,
50448 9, 10, 11, 12,
50449 13, 14, 15, 16,
50450 );
50451 assert_eq_m128i(r, e);
50452 }
50453
50454 #[simd_test(enable = "avx512f,avx512vl")]
50455 fn test_mm_cvtsepi32_epi8() {
50456 let a = _mm_set_epi32(13, 14, 15, 16);
50457 let r = _mm_cvtsepi32_epi8(a);
50458 #[rustfmt::skip]
50459 let e = _mm_set_epi8(
50460 0, 0, 0, 0,
50461 0, 0, 0, 0,
50462 0, 0, 0, 0,
50463 13, 14, 15, 16,
50464 );
50465 assert_eq_m128i(r, e);
50466 }
50467
50468 #[simd_test(enable = "avx512f,avx512vl")]
50469 fn test_mm_mask_cvtsepi32_epi8() {
50470 let a = _mm_set_epi32(13, 14, 15, 16);
50471 let src = _mm_set1_epi8(0);
50472 let r = _mm_mask_cvtsepi32_epi8(src, 0, a);
50473 assert_eq_m128i(r, src);
50474 let r = _mm_mask_cvtsepi32_epi8(src, 0b00001111, a);
50475 #[rustfmt::skip]
50476 let e = _mm_set_epi8(
50477 0, 0, 0, 0,
50478 0, 0, 0, 0,
50479 0, 0, 0, 0,
50480 13, 14, 15, 16,
50481 );
50482 assert_eq_m128i(r, e);
50483 }
50484
50485 #[simd_test(enable = "avx512f,avx512vl")]
50486 fn test_mm_maskz_cvtsepi32_epi8() {
50487 let a = _mm_set_epi32(13, 14, 15, 16);
50488 let r = _mm_maskz_cvtsepi32_epi8(0, a);
50489 assert_eq_m128i(r, _mm_setzero_si128());
50490 let r = _mm_maskz_cvtsepi32_epi8(0b00001111, a);
50491 #[rustfmt::skip]
50492 let e = _mm_set_epi8(
50493 0, 0, 0, 0,
50494 0, 0, 0, 0,
50495 0, 0, 0, 0,
50496 13, 14, 15, 16,
50497 );
50498 assert_eq_m128i(r, e);
50499 }
50500
50501 #[simd_test(enable = "avx512f")]
50502 fn test_mm512_cvtusepi32_epi16() {
50503 #[rustfmt::skip]
50504 let a = _mm512_set_epi32(
50505 0, 1, 2, 3,
50506 4, 5, 6, 7,
50507 8, 9, 10, 11,
50508 12, 13, i32::MIN, i32::MIN,
50509 );
50510 let r = _mm512_cvtusepi32_epi16(a);
50511 let e = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, -1, -1);
50512 assert_eq_m256i(r, e);
50513 }
50514
50515 #[simd_test(enable = "avx512f")]
50516 fn test_mm512_mask_cvtusepi32_epi16() {
50517 #[rustfmt::skip]
50518 let a = _mm512_set_epi32(
50519 0, 1, 2, 3,
50520 4, 5, 6, 7,
50521 8, 9, 10, 11,
50522 12, 13, i32::MIN, i32::MIN,
50523 );
50524 let src = _mm256_set1_epi16(-1);
50525 let r = _mm512_mask_cvtusepi32_epi16(src, 0, a);
50526 assert_eq_m256i(r, src);
50527 let r = _mm512_mask_cvtusepi32_epi16(src, 0b00000000_11111111, a);
50528 let e = _mm256_set_epi16(-1, -1, -1, -1, -1, -1, -1, -1, 8, 9, 10, 11, 12, 13, -1, -1);
50529 assert_eq_m256i(r, e);
50530 }
50531
50532 #[simd_test(enable = "avx512f")]
50533 fn test_mm512_maskz_cvtusepi32_epi16() {
50534 #[rustfmt::skip]
50535 let a = _mm512_set_epi32(
50536 0, 1, 2, 3,
50537 4, 5, 6, 7,
50538 8, 9, 10, 11,
50539 12, 13, i32::MIN, i32::MIN,
50540 );
50541 let r = _mm512_maskz_cvtusepi32_epi16(0, a);
50542 assert_eq_m256i(r, _mm256_setzero_si256());
50543 let r = _mm512_maskz_cvtusepi32_epi16(0b00000000_11111111, a);
50544 let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, -1, -1);
50545 assert_eq_m256i(r, e);
50546 }
50547
50548 #[simd_test(enable = "avx512f,avx512vl")]
50549 fn test_mm256_cvtusepi32_epi16() {
50550 let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
50551 let r = _mm256_cvtusepi32_epi16(a);
50552 let e = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8);
50553 assert_eq_m128i(r, e);
50554 }
50555
50556 #[simd_test(enable = "avx512f,avx512vl")]
50557 fn test_mm256_mask_cvtusepi32_epi16() {
50558 let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
50559 let src = _mm_set1_epi16(0);
50560 let r = _mm256_mask_cvtusepi32_epi16(src, 0, a);
50561 assert_eq_m128i(r, src);
50562 let r = _mm256_mask_cvtusepi32_epi16(src, 0b11111111, a);
50563 let e = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8);
50564 assert_eq_m128i(r, e);
50565 }
50566
50567 #[simd_test(enable = "avx512f,avx512vl")]
50568 fn test_mm256_maskz_cvtusepi32_epi16() {
50569 let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
50570 let r = _mm256_maskz_cvtusepi32_epi16(0, a);
50571 assert_eq_m128i(r, _mm_setzero_si128());
50572 let r = _mm256_maskz_cvtusepi32_epi16(0b11111111, a);
50573 let e = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8);
50574 assert_eq_m128i(r, e);
50575 }
50576
50577 #[simd_test(enable = "avx512f,avx512vl")]
50578 fn test_mm_cvtusepi32_epi16() {
50579 let a = _mm_set_epi32(5, 6, 7, 8);
50580 let r = _mm_cvtusepi32_epi16(a);
50581 let e = _mm_set_epi16(0, 0, 0, 0, 5, 6, 7, 8);
50582 assert_eq_m128i(r, e);
50583 }
50584
50585 #[simd_test(enable = "avx512f,avx512vl")]
50586 fn test_mm_mask_cvtusepi32_epi16() {
50587 let a = _mm_set_epi32(5, 6, 7, 8);
50588 let src = _mm_set1_epi16(0);
50589 let r = _mm_mask_cvtusepi32_epi16(src, 0, a);
50590 assert_eq_m128i(r, src);
50591 let r = _mm_mask_cvtusepi32_epi16(src, 0b00001111, a);
50592 let e = _mm_set_epi16(0, 0, 0, 0, 5, 6, 7, 8);
50593 assert_eq_m128i(r, e);
50594 }
50595
50596 #[simd_test(enable = "avx512f,avx512vl")]
50597 fn test_mm_maskz_cvtusepi32_epi16() {
50598 let a = _mm_set_epi32(5, 6, 7, 8);
50599 let r = _mm_maskz_cvtusepi32_epi16(0, a);
50600 assert_eq_m128i(r, _mm_setzero_si128());
50601 let r = _mm_maskz_cvtusepi32_epi16(0b00001111, a);
50602 let e = _mm_set_epi16(0, 0, 0, 0, 5, 6, 7, 8);
50603 assert_eq_m128i(r, e);
50604 }
50605
50606 #[simd_test(enable = "avx512f")]
50607 fn test_mm512_cvtusepi32_epi8() {
50608 #[rustfmt::skip]
50609 let a = _mm512_set_epi32(
50610 0, 1, 2, 3,
50611 4, 5, 6, 7,
50612 8, 9, 10, 11,
50613 12, 13, i32::MIN, i32::MIN,
50614 );
50615 let r = _mm512_cvtusepi32_epi8(a);
50616 let e = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, -1, -1);
50617 assert_eq_m128i(r, e);
50618 }
50619
50620 #[simd_test(enable = "avx512f")]
50621 fn test_mm512_mask_cvtusepi32_epi8() {
50622 #[rustfmt::skip]
50623 let a = _mm512_set_epi32(
50624 0, 1, 2, 3,
50625 4, 5, 6, 7,
50626 8, 9, 10, 11,
50627 12, 13, i32::MIN, i32::MIN,
50628 );
50629 let src = _mm_set1_epi8(-1);
50630 let r = _mm512_mask_cvtusepi32_epi8(src, 0, a);
50631 assert_eq_m128i(r, src);
50632 let r = _mm512_mask_cvtusepi32_epi8(src, 0b00000000_11111111, a);
50633 let e = _mm_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, 8, 9, 10, 11, 12, 13, -1, -1);
50634 assert_eq_m128i(r, e);
50635 }
50636
50637 #[simd_test(enable = "avx512f")]
50638 fn test_mm512_maskz_cvtusepi32_epi8() {
50639 #[rustfmt::skip]
50640 let a = _mm512_set_epi32(
50641 0, 1, 2, 3,
50642 4, 5, 6, 7,
50643 8, 9, 10, 11,
50644 12, 13, i32::MIN, i32::MIN,
50645 );
50646 let r = _mm512_maskz_cvtusepi32_epi8(0, a);
50647 assert_eq_m128i(r, _mm_setzero_si128());
50648 let r = _mm512_maskz_cvtusepi32_epi8(0b00000000_11111111, a);
50649 let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, -1, -1);
50650 assert_eq_m128i(r, e);
50651 }
50652
50653 #[simd_test(enable = "avx512f,avx512vl")]
50654 fn test_mm256_cvtusepi32_epi8() {
50655 let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, i32::MAX);
50656 let r = _mm256_cvtusepi32_epi8(a);
50657 let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, u8::MAX as i8);
50658 assert_eq_m128i(r, e);
50659 }
50660
50661 #[simd_test(enable = "avx512f,avx512vl")]
50662 fn test_mm256_mask_cvtusepi32_epi8() {
50663 let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, i32::MAX);
50664 let src = _mm_set1_epi8(0);
50665 let r = _mm256_mask_cvtusepi32_epi8(src, 0, a);
50666 assert_eq_m128i(r, src);
50667 let r = _mm256_mask_cvtusepi32_epi8(src, 0b11111111, a);
50668 let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, u8::MAX as i8);
50669 assert_eq_m128i(r, e);
50670 }
50671
50672 #[simd_test(enable = "avx512f,avx512vl")]
50673 fn test_mm256_maskz_cvtusepi32_epi8() {
50674 let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, i32::MAX);
50675 let r = _mm256_maskz_cvtusepi32_epi8(0, a);
50676 assert_eq_m128i(r, _mm_setzero_si128());
50677 let r = _mm256_maskz_cvtusepi32_epi8(0b11111111, a);
50678 let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, u8::MAX as i8);
50679 assert_eq_m128i(r, e);
50680 }
50681
50682 #[simd_test(enable = "avx512f,avx512vl")]
50683 fn test_mm_cvtusepi32_epi8() {
50684 let a = _mm_set_epi32(5, 6, 7, i32::MAX);
50685 let r = _mm_cvtusepi32_epi8(a);
50686 let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 6, 7, u8::MAX as i8);
50687 assert_eq_m128i(r, e);
50688 }
50689
50690 #[simd_test(enable = "avx512f,avx512vl")]
50691 fn test_mm_mask_cvtusepi32_epi8() {
50692 let a = _mm_set_epi32(5, 6, 7, i32::MAX);
50693 let src = _mm_set1_epi8(0);
50694 let r = _mm_mask_cvtusepi32_epi8(src, 0, a);
50695 assert_eq_m128i(r, src);
50696 let r = _mm_mask_cvtusepi32_epi8(src, 0b00001111, a);
50697 let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 6, 7, u8::MAX as i8);
50698 assert_eq_m128i(r, e);
50699 }
50700
50701 #[simd_test(enable = "avx512f,avx512vl")]
50702 fn test_mm_maskz_cvtusepi32_epi8() {
50703 let a = _mm_set_epi32(5, 6, 7, i32::MAX);
50704 let r = _mm_maskz_cvtusepi32_epi8(0, a);
50705 assert_eq_m128i(r, _mm_setzero_si128());
50706 let r = _mm_maskz_cvtusepi32_epi8(0b00001111, a);
50707 let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 6, 7, u8::MAX as i8);
50708 assert_eq_m128i(r, e);
50709 }
50710
50711 #[simd_test(enable = "avx512f")]
50712 fn test_mm512_cvt_roundps_epi32() {
50713 let a = _mm512_setr_ps(
50714 0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
50715 );
50716 let r = _mm512_cvt_roundps_epi32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
50717 let e = _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 8, 10, 10, 12, 12, 14, 14, 16);
50718 assert_eq_m512i(r, e);
50719 let r = _mm512_cvt_roundps_epi32::<{ _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC }>(a);
50720 let e = _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 8, 9, 10, 11, 12, 13, 14, 15);
50721 assert_eq_m512i(r, e);
50722 }
50723
50724 #[simd_test(enable = "avx512f")]
50725 fn test_mm512_mask_cvt_roundps_epi32() {
50726 let a = _mm512_setr_ps(
50727 0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
50728 );
50729 let src = _mm512_set1_epi32(0);
50730 let r = _mm512_mask_cvt_roundps_epi32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
50731 src, 0, a,
50732 );
50733 assert_eq_m512i(r, src);
50734 let r = _mm512_mask_cvt_roundps_epi32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
50735 src,
50736 0b00000000_11111111,
50737 a,
50738 );
50739 let e = _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 0, 0, 0, 0, 0, 0, 0, 0);
50740 assert_eq_m512i(r, e);
50741 }
50742
50743 #[simd_test(enable = "avx512f")]
50744 fn test_mm512_maskz_cvt_roundps_epi32() {
50745 let a = _mm512_setr_ps(
50746 0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
50747 );
50748 let r = _mm512_maskz_cvt_roundps_epi32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
50749 0, a,
50750 );
50751 assert_eq_m512i(r, _mm512_setzero_si512());
50752 let r = _mm512_maskz_cvt_roundps_epi32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
50753 0b00000000_11111111,
50754 a,
50755 );
50756 let e = _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 0, 0, 0, 0, 0, 0, 0, 0);
50757 assert_eq_m512i(r, e);
50758 }
50759
50760 #[simd_test(enable = "avx512f")]
50761 fn test_mm512_cvt_roundps_epu32() {
50762 let a = _mm512_setr_ps(
50763 0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
50764 );
50765 let r = _mm512_cvt_roundps_epu32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
50766 let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 8, 10, 10, 12, 12, 14, 14, 16);
50767 assert_eq_m512i(r, e);
50768 let r = _mm512_cvt_roundps_epu32::<{ _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC }>(a);
50769 let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 8, 9, 10, 11, 12, 13, 14, 15);
50770 assert_eq_m512i(r, e);
50771 }
50772
50773 #[simd_test(enable = "avx512f")]
50774 fn test_mm512_mask_cvt_roundps_epu32() {
50775 let a = _mm512_setr_ps(
50776 0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
50777 );
50778 let src = _mm512_set1_epi32(0);
50779 let r = _mm512_mask_cvt_roundps_epu32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
50780 src, 0, a,
50781 );
50782 assert_eq_m512i(r, src);
50783 let r = _mm512_mask_cvt_roundps_epu32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
50784 src,
50785 0b00000000_11111111,
50786 a,
50787 );
50788 let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 0, 0, 0, 0, 0, 0, 0, 0);
50789 assert_eq_m512i(r, e);
50790 }
50791
50792 #[simd_test(enable = "avx512f")]
50793 fn test_mm512_maskz_cvt_roundps_epu32() {
50794 let a = _mm512_setr_ps(
50795 0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
50796 );
50797 let r = _mm512_maskz_cvt_roundps_epu32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
50798 0, a,
50799 );
50800 assert_eq_m512i(r, _mm512_setzero_si512());
50801 let r = _mm512_maskz_cvt_roundps_epu32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
50802 0b00000000_11111111,
50803 a,
50804 );
50805 let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 0, 0, 0, 0, 0, 0, 0, 0);
50806 assert_eq_m512i(r, e);
50807 }
50808
50809 #[simd_test(enable = "avx512f")]
50810 fn test_mm512_cvt_roundepi32_ps() {
50811 let a = _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 8, 10, 10, 12, 12, 14, 14, 16);
50812 let r = _mm512_cvt_roundepi32_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
50813 let e = _mm512_setr_ps(
50814 0., -2., 2., -4., 4., -6., 6., -8., 8., 10., 10., 12., 12., 14., 14., 16.,
50815 );
50816 assert_eq_m512(r, e);
50817 }
50818
50819 #[simd_test(enable = "avx512f")]
50820 fn test_mm512_mask_cvt_roundepi32_ps() {
50821 let a = _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 8, 10, 10, 12, 12, 14, 14, 16);
50822 let src = _mm512_set1_ps(0.);
50823 let r = _mm512_mask_cvt_roundepi32_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
50824 src, 0, a,
50825 );
50826 assert_eq_m512(r, src);
50827 let r = _mm512_mask_cvt_roundepi32_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
50828 src,
50829 0b00000000_11111111,
50830 a,
50831 );
50832 let e = _mm512_setr_ps(
50833 0., -2., 2., -4., 4., -6., 6., -8., 0., 0., 0., 0., 0., 0., 0., 0.,
50834 );
50835 assert_eq_m512(r, e);
50836 }
50837
50838 #[simd_test(enable = "avx512f")]
50839 fn test_mm512_maskz_cvt_roundepi32_ps() {
50840 let a = _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 8, 10, 10, 12, 12, 14, 14, 16);
50841 let r = _mm512_maskz_cvt_roundepi32_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
50842 0, a,
50843 );
50844 assert_eq_m512(r, _mm512_setzero_ps());
50845 let r = _mm512_maskz_cvt_roundepi32_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
50846 0b00000000_11111111,
50847 a,
50848 );
50849 let e = _mm512_setr_ps(
50850 0., -2., 2., -4., 4., -6., 6., -8., 0., 0., 0., 0., 0., 0., 0., 0.,
50851 );
50852 assert_eq_m512(r, e);
50853 }
50854
50855 #[simd_test(enable = "avx512f")]
50856 fn test_mm512_cvt_roundepu32_ps() {
50857 let a = _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 8, 10, 10, 12, 12, 14, 14, 16);
50858 let r = _mm512_cvt_roundepu32_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
50859 #[rustfmt::skip]
50860 let e = _mm512_setr_ps(
50861 0., 4294967300., 2., 4294967300.,
50862 4., 4294967300., 6., 4294967300.,
50863 8., 10., 10., 12.,
50864 12., 14., 14., 16.,
50865 );
50866 assert_eq_m512(r, e);
50867 }
50868
50869 #[simd_test(enable = "avx512f")]
50870 fn test_mm512_mask_cvt_roundepu32_ps() {
50871 let a = _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 8, 10, 10, 12, 12, 14, 14, 16);
50872 let src = _mm512_set1_ps(0.);
50873 let r = _mm512_mask_cvt_roundepu32_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
50874 src, 0, a,
50875 );
50876 assert_eq_m512(r, src);
50877 let r = _mm512_mask_cvt_roundepu32_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
50878 src,
50879 0b00000000_11111111,
50880 a,
50881 );
50882 #[rustfmt::skip]
50883 let e = _mm512_setr_ps(
50884 0., 4294967300., 2., 4294967300.,
50885 4., 4294967300., 6., 4294967300.,
50886 0., 0., 0., 0.,
50887 0., 0., 0., 0.,
50888 );
50889 assert_eq_m512(r, e);
50890 }
50891
50892 #[simd_test(enable = "avx512f")]
50893 fn test_mm512_maskz_cvt_roundepu32_ps() {
50894 let a = _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 8, 10, 10, 12, 12, 14, 14, 16);
50895 let r = _mm512_maskz_cvt_roundepu32_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
50896 0, a,
50897 );
50898 assert_eq_m512(r, _mm512_setzero_ps());
50899 let r = _mm512_maskz_cvt_roundepu32_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
50900 0b00000000_11111111,
50901 a,
50902 );
50903 #[rustfmt::skip]
50904 let e = _mm512_setr_ps(
50905 0., 4294967300., 2., 4294967300.,
50906 4., 4294967300., 6., 4294967300.,
50907 0., 0., 0., 0.,
50908 0., 0., 0., 0.,
50909 );
50910 assert_eq_m512(r, e);
50911 }
50912
50913 #[simd_test(enable = "avx512f")]
50914 fn test_mm512_cvt_roundps_ph() {
50915 let a = _mm512_set1_ps(1.);
50916 let r = _mm512_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(a);
50917 let e = _mm256_setr_epi64x(
50918 4323521613979991040,
50919 4323521613979991040,
50920 4323521613979991040,
50921 4323521613979991040,
50922 );
50923 assert_eq_m256i(r, e);
50924 }
50925
50926 #[simd_test(enable = "avx512f")]
50927 fn test_mm512_mask_cvt_roundps_ph() {
50928 let a = _mm512_set1_ps(1.);
50929 let src = _mm256_set1_epi16(0);
50930 let r = _mm512_mask_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(src, 0, a);
50931 assert_eq_m256i(r, src);
50932 let r = _mm512_mask_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(src, 0b00000000_11111111, a);
50933 let e = _mm256_setr_epi64x(4323521613979991040, 4323521613979991040, 0, 0);
50934 assert_eq_m256i(r, e);
50935 }
50936
50937 #[simd_test(enable = "avx512f")]
50938 fn test_mm512_maskz_cvt_roundps_ph() {
50939 let a = _mm512_set1_ps(1.);
50940 let r = _mm512_maskz_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(0, a);
50941 assert_eq_m256i(r, _mm256_setzero_si256());
50942 let r = _mm512_maskz_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(0b00000000_11111111, a);
50943 let e = _mm256_setr_epi64x(4323521613979991040, 4323521613979991040, 0, 0);
50944 assert_eq_m256i(r, e);
50945 }
50946
50947 #[simd_test(enable = "avx512f,avx512vl")]
50948 fn test_mm256_mask_cvt_roundps_ph() {
50949 let a = _mm256_set1_ps(1.);
50950 let src = _mm_set1_epi16(0);
50951 let r = _mm256_mask_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(src, 0, a);
50952 assert_eq_m128i(r, src);
50953 let r = _mm256_mask_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(src, 0b11111111, a);
50954 let e = _mm_setr_epi64x(4323521613979991040, 4323521613979991040);
50955 assert_eq_m128i(r, e);
50956 }
50957
50958 #[simd_test(enable = "avx512f,avx512vl")]
50959 fn test_mm256_maskz_cvt_roundps_ph() {
50960 let a = _mm256_set1_ps(1.);
50961 let r = _mm256_maskz_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(0, a);
50962 assert_eq_m128i(r, _mm_setzero_si128());
50963 let r = _mm256_maskz_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(0b11111111, a);
50964 let e = _mm_setr_epi64x(4323521613979991040, 4323521613979991040);
50965 assert_eq_m128i(r, e);
50966 }
50967
50968 #[simd_test(enable = "avx512f,avx512vl")]
50969 fn test_mm_mask_cvt_roundps_ph() {
50970 let a = _mm_set1_ps(1.);
50971 let src = _mm_set1_epi16(0);
50972 let r = _mm_mask_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(src, 0, a);
50973 assert_eq_m128i(r, src);
50974 let r = _mm_mask_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(src, 0b00001111, a);
50975 let e = _mm_setr_epi64x(4323521613979991040, 0);
50976 assert_eq_m128i(r, e);
50977 }
50978
50979 #[simd_test(enable = "avx512f,avx512vl")]
50980 fn test_mm_maskz_cvt_roundps_ph() {
50981 let a = _mm_set1_ps(1.);
50982 let r = _mm_maskz_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(0, a);
50983 assert_eq_m128i(r, _mm_setzero_si128());
50984 let r = _mm_maskz_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(0b00001111, a);
50985 let e = _mm_setr_epi64x(4323521613979991040, 0);
50986 assert_eq_m128i(r, e);
50987 }
50988
50989 #[simd_test(enable = "avx512f")]
50990 fn test_mm512_cvtps_ph() {
50991 let a = _mm512_set1_ps(1.);
50992 let r = _mm512_cvtps_ph::<_MM_FROUND_NO_EXC>(a);
50993 let e = _mm256_setr_epi64x(
50994 4323521613979991040,
50995 4323521613979991040,
50996 4323521613979991040,
50997 4323521613979991040,
50998 );
50999 assert_eq_m256i(r, e);
51000 }
51001
51002 #[simd_test(enable = "avx512f")]
51003 fn test_mm512_mask_cvtps_ph() {
51004 let a = _mm512_set1_ps(1.);
51005 let src = _mm256_set1_epi16(0);
51006 let r = _mm512_mask_cvtps_ph::<_MM_FROUND_NO_EXC>(src, 0, a);
51007 assert_eq_m256i(r, src);
51008 let r = _mm512_mask_cvtps_ph::<_MM_FROUND_NO_EXC>(src, 0b00000000_11111111, a);
51009 let e = _mm256_setr_epi64x(4323521613979991040, 4323521613979991040, 0, 0);
51010 assert_eq_m256i(r, e);
51011 }
51012
51013 #[simd_test(enable = "avx512f")]
51014 fn test_mm512_maskz_cvtps_ph() {
51015 let a = _mm512_set1_ps(1.);
51016 let r = _mm512_maskz_cvtps_ph::<_MM_FROUND_NO_EXC>(0, a);
51017 assert_eq_m256i(r, _mm256_setzero_si256());
51018 let r = _mm512_maskz_cvtps_ph::<_MM_FROUND_NO_EXC>(0b00000000_11111111, a);
51019 let e = _mm256_setr_epi64x(4323521613979991040, 4323521613979991040, 0, 0);
51020 assert_eq_m256i(r, e);
51021 }
51022
51023 #[simd_test(enable = "avx512f,avx512vl")]
51024 fn test_mm256_mask_cvtps_ph() {
51025 let a = _mm256_set1_ps(1.);
51026 let src = _mm_set1_epi16(0);
51027 let r = _mm256_mask_cvtps_ph::<_MM_FROUND_NO_EXC>(src, 0, a);
51028 assert_eq_m128i(r, src);
51029 let r = _mm256_mask_cvtps_ph::<_MM_FROUND_NO_EXC>(src, 0b11111111, a);
51030 let e = _mm_setr_epi64x(4323521613979991040, 4323521613979991040);
51031 assert_eq_m128i(r, e);
51032 }
51033
51034 #[simd_test(enable = "avx512f,avx512vl")]
51035 fn test_mm256_maskz_cvtps_ph() {
51036 let a = _mm256_set1_ps(1.);
51037 let r = _mm256_maskz_cvtps_ph::<_MM_FROUND_NO_EXC>(0, a);
51038 assert_eq_m128i(r, _mm_setzero_si128());
51039 let r = _mm256_maskz_cvtps_ph::<_MM_FROUND_NO_EXC>(0b11111111, a);
51040 let e = _mm_setr_epi64x(4323521613979991040, 4323521613979991040);
51041 assert_eq_m128i(r, e);
51042 }
51043
51044 #[simd_test(enable = "avx512f,avx512vl")]
51045 fn test_mm_mask_cvtps_ph() {
51046 let a = _mm_set1_ps(1.);
51047 let src = _mm_set1_epi16(0);
51048 let r = _mm_mask_cvtps_ph::<_MM_FROUND_NO_EXC>(src, 0, a);
51049 assert_eq_m128i(r, src);
51050 let r = _mm_mask_cvtps_ph::<_MM_FROUND_NO_EXC>(src, 0b00001111, a);
51051 let e = _mm_setr_epi64x(4323521613979991040, 0);
51052 assert_eq_m128i(r, e);
51053 }
51054
51055 #[simd_test(enable = "avx512f,avx512vl")]
51056 fn test_mm_maskz_cvtps_ph() {
51057 let a = _mm_set1_ps(1.);
51058 let r = _mm_maskz_cvtps_ph::<_MM_FROUND_NO_EXC>(0, a);
51059 assert_eq_m128i(r, _mm_setzero_si128());
51060 let r = _mm_maskz_cvtps_ph::<_MM_FROUND_NO_EXC>(0b00001111, a);
51061 let e = _mm_setr_epi64x(4323521613979991040, 0);
51062 assert_eq_m128i(r, e);
51063 }
51064
51065 #[simd_test(enable = "avx512f")]
51066 fn test_mm512_cvt_roundph_ps() {
51067 let a = _mm256_setr_epi64x(
51068 4323521613979991040,
51069 4323521613979991040,
51070 4323521613979991040,
51071 4323521613979991040,
51072 );
51073 let r = _mm512_cvt_roundph_ps::<_MM_FROUND_NO_EXC>(a);
51074 let e = _mm512_set1_ps(1.);
51075 assert_eq_m512(r, e);
51076 }
51077
51078 #[simd_test(enable = "avx512f")]
51079 fn test_mm512_mask_cvt_roundph_ps() {
51080 let a = _mm256_setr_epi64x(
51081 4323521613979991040,
51082 4323521613979991040,
51083 4323521613979991040,
51084 4323521613979991040,
51085 );
51086 let src = _mm512_set1_ps(0.);
51087 let r = _mm512_mask_cvt_roundph_ps::<_MM_FROUND_NO_EXC>(src, 0, a);
51088 assert_eq_m512(r, src);
51089 let r = _mm512_mask_cvt_roundph_ps::<_MM_FROUND_NO_EXC>(src, 0b00000000_11111111, a);
51090 let e = _mm512_setr_ps(
51091 1., 1., 1., 1., 1., 1., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0.,
51092 );
51093 assert_eq_m512(r, e);
51094 }
51095
51096 #[simd_test(enable = "avx512f")]
51097 fn test_mm512_maskz_cvt_roundph_ps() {
51098 let a = _mm256_setr_epi64x(
51099 4323521613979991040,
51100 4323521613979991040,
51101 4323521613979991040,
51102 4323521613979991040,
51103 );
51104 let r = _mm512_maskz_cvt_roundph_ps::<_MM_FROUND_NO_EXC>(0, a);
51105 assert_eq_m512(r, _mm512_setzero_ps());
51106 let r = _mm512_maskz_cvt_roundph_ps::<_MM_FROUND_NO_EXC>(0b00000000_11111111, a);
51107 let e = _mm512_setr_ps(
51108 1., 1., 1., 1., 1., 1., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0.,
51109 );
51110 assert_eq_m512(r, e);
51111 }
51112
51113 #[simd_test(enable = "avx512f")]
51114 fn test_mm512_cvtph_ps() {
51115 let a = _mm256_setr_epi64x(
51116 4323521613979991040,
51117 4323521613979991040,
51118 4323521613979991040,
51119 4323521613979991040,
51120 );
51121 let r = _mm512_cvtph_ps(a);
51122 let e = _mm512_set1_ps(1.);
51123 assert_eq_m512(r, e);
51124 }
51125
51126 #[simd_test(enable = "avx512f")]
51127 fn test_mm512_mask_cvtph_ps() {
51128 let a = _mm256_setr_epi64x(
51129 4323521613979991040,
51130 4323521613979991040,
51131 4323521613979991040,
51132 4323521613979991040,
51133 );
51134 let src = _mm512_set1_ps(0.);
51135 let r = _mm512_mask_cvtph_ps(src, 0, a);
51136 assert_eq_m512(r, src);
51137 let r = _mm512_mask_cvtph_ps(src, 0b00000000_11111111, a);
51138 let e = _mm512_setr_ps(
51139 1., 1., 1., 1., 1., 1., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0.,
51140 );
51141 assert_eq_m512(r, e);
51142 }
51143
51144 #[simd_test(enable = "avx512f")]
51145 fn test_mm512_maskz_cvtph_ps() {
51146 let a = _mm256_setr_epi64x(
51147 4323521613979991040,
51148 4323521613979991040,
51149 4323521613979991040,
51150 4323521613979991040,
51151 );
51152 let r = _mm512_maskz_cvtph_ps(0, a);
51153 assert_eq_m512(r, _mm512_setzero_ps());
51154 let r = _mm512_maskz_cvtph_ps(0b00000000_11111111, a);
51155 let e = _mm512_setr_ps(
51156 1., 1., 1., 1., 1., 1., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0.,
51157 );
51158 assert_eq_m512(r, e);
51159 }
51160
51161 #[simd_test(enable = "avx512f,avx512vl")]
51162 fn test_mm256_mask_cvtph_ps() {
51163 let a = _mm_setr_epi64x(4323521613979991040, 4323521613979991040);
51164 let src = _mm256_set1_ps(0.);
51165 let r = _mm256_mask_cvtph_ps(src, 0, a);
51166 assert_eq_m256(r, src);
51167 let r = _mm256_mask_cvtph_ps(src, 0b11111111, a);
51168 let e = _mm256_setr_ps(1., 1., 1., 1., 1., 1., 1., 1.);
51169 assert_eq_m256(r, e);
51170 }
51171
51172 #[simd_test(enable = "avx512f,avx512vl")]
51173 fn test_mm256_maskz_cvtph_ps() {
51174 let a = _mm_setr_epi64x(4323521613979991040, 4323521613979991040);
51175 let r = _mm256_maskz_cvtph_ps(0, a);
51176 assert_eq_m256(r, _mm256_setzero_ps());
51177 let r = _mm256_maskz_cvtph_ps(0b11111111, a);
51178 let e = _mm256_setr_ps(1., 1., 1., 1., 1., 1., 1., 1.);
51179 assert_eq_m256(r, e);
51180 }
51181
51182 #[simd_test(enable = "avx512f,avx512vl")]
51183 fn test_mm_mask_cvtph_ps() {
51184 let a = _mm_setr_epi64x(4323521613979991040, 4323521613979991040);
51185 let src = _mm_set1_ps(0.);
51186 let r = _mm_mask_cvtph_ps(src, 0, a);
51187 assert_eq_m128(r, src);
51188 let r = _mm_mask_cvtph_ps(src, 0b00001111, a);
51189 let e = _mm_setr_ps(1., 1., 1., 1.);
51190 assert_eq_m128(r, e);
51191 }
51192
51193 #[simd_test(enable = "avx512f,avx512vl")]
51194 fn test_mm_maskz_cvtph_ps() {
51195 let a = _mm_setr_epi64x(4323521613979991040, 4323521613979991040);
51196 let r = _mm_maskz_cvtph_ps(0, a);
51197 assert_eq_m128(r, _mm_setzero_ps());
51198 let r = _mm_maskz_cvtph_ps(0b00001111, a);
51199 let e = _mm_setr_ps(1., 1., 1., 1.);
51200 assert_eq_m128(r, e);
51201 }
51202
51203 #[simd_test(enable = "avx512f")]
51204 fn test_mm512_cvtt_roundps_epi32() {
51205 let a = _mm512_setr_ps(
51206 0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
51207 );
51208 let r = _mm512_cvtt_roundps_epi32::<_MM_FROUND_NO_EXC>(a);
51209 let e = _mm512_setr_epi32(0, -1, 2, -3, 4, -5, 6, -7, 8, 9, 10, 11, 12, 13, 14, 15);
51210 assert_eq_m512i(r, e);
51211 }
51212
51213 #[simd_test(enable = "avx512f")]
51214 fn test_mm512_mask_cvtt_roundps_epi32() {
51215 let a = _mm512_setr_ps(
51216 0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
51217 );
51218 let src = _mm512_set1_epi32(0);
51219 let r = _mm512_mask_cvtt_roundps_epi32::<_MM_FROUND_NO_EXC>(src, 0, a);
51220 assert_eq_m512i(r, src);
51221 let r = _mm512_mask_cvtt_roundps_epi32::<_MM_FROUND_NO_EXC>(src, 0b00000000_11111111, a);
51222 let e = _mm512_setr_epi32(0, -1, 2, -3, 4, -5, 6, -7, 0, 0, 0, 0, 0, 0, 0, 0);
51223 assert_eq_m512i(r, e);
51224 }
51225
51226 #[simd_test(enable = "avx512f")]
51227 fn test_mm512_maskz_cvtt_roundps_epi32() {
51228 let a = _mm512_setr_ps(
51229 0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
51230 );
51231 let r = _mm512_maskz_cvtt_roundps_epi32::<_MM_FROUND_NO_EXC>(0, a);
51232 assert_eq_m512i(r, _mm512_setzero_si512());
51233 let r = _mm512_maskz_cvtt_roundps_epi32::<_MM_FROUND_NO_EXC>(0b00000000_11111111, a);
51234 let e = _mm512_setr_epi32(0, -1, 2, -3, 4, -5, 6, -7, 0, 0, 0, 0, 0, 0, 0, 0);
51235 assert_eq_m512i(r, e);
51236 }
51237
51238 #[simd_test(enable = "avx512f")]
51239 fn test_mm512_cvtt_roundps_epu32() {
51240 let a = _mm512_setr_ps(
51241 0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
51242 );
51243 let r = _mm512_cvtt_roundps_epu32::<_MM_FROUND_NO_EXC>(a);
51244 let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 8, 9, 10, 11, 12, 13, 14, 15);
51245 assert_eq_m512i(r, e);
51246 }
51247
51248 #[simd_test(enable = "avx512f")]
51249 fn test_mm512_mask_cvtt_roundps_epu32() {
51250 let a = _mm512_setr_ps(
51251 0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
51252 );
51253 let src = _mm512_set1_epi32(0);
51254 let r = _mm512_mask_cvtt_roundps_epu32::<_MM_FROUND_NO_EXC>(src, 0, a);
51255 assert_eq_m512i(r, src);
51256 let r = _mm512_mask_cvtt_roundps_epu32::<_MM_FROUND_NO_EXC>(src, 0b00000000_11111111, a);
51257 let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 0, 0, 0, 0, 0, 0, 0, 0);
51258 assert_eq_m512i(r, e);
51259 }
51260
51261 #[simd_test(enable = "avx512f")]
51262 fn test_mm512_maskz_cvtt_roundps_epu32() {
51263 let a = _mm512_setr_ps(
51264 0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
51265 );
51266 let r = _mm512_maskz_cvtt_roundps_epu32::<_MM_FROUND_NO_EXC>(0, a);
51267 assert_eq_m512i(r, _mm512_setzero_si512());
51268 let r = _mm512_maskz_cvtt_roundps_epu32::<_MM_FROUND_NO_EXC>(0b00000000_11111111, a);
51269 let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 0, 0, 0, 0, 0, 0, 0, 0);
51270 assert_eq_m512i(r, e);
51271 }
51272
51273 #[simd_test(enable = "avx512f")]
51274 fn test_mm512_cvttps_epi32() {
51275 let a = _mm512_setr_ps(
51276 0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
51277 );
51278 let r = _mm512_cvttps_epi32(a);
51279 let e = _mm512_setr_epi32(0, -1, 2, -3, 4, -5, 6, -7, 8, 9, 10, 11, 12, 13, 14, 15);
51280 assert_eq_m512i(r, e);
51281 }
51282
51283 #[simd_test(enable = "avx512f")]
51284 fn test_mm512_mask_cvttps_epi32() {
51285 let a = _mm512_setr_ps(
51286 0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
51287 );
51288 let src = _mm512_set1_epi32(0);
51289 let r = _mm512_mask_cvttps_epi32(src, 0, a);
51290 assert_eq_m512i(r, src);
51291 let r = _mm512_mask_cvttps_epi32(src, 0b00000000_11111111, a);
51292 let e = _mm512_setr_epi32(0, -1, 2, -3, 4, -5, 6, -7, 0, 0, 0, 0, 0, 0, 0, 0);
51293 assert_eq_m512i(r, e);
51294 }
51295
51296 #[simd_test(enable = "avx512f")]
51297 fn test_mm512_maskz_cvttps_epi32() {
51298 let a = _mm512_setr_ps(
51299 0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
51300 );
51301 let r = _mm512_maskz_cvttps_epi32(0, a);
51302 assert_eq_m512i(r, _mm512_setzero_si512());
51303 let r = _mm512_maskz_cvttps_epi32(0b00000000_11111111, a);
51304 let e = _mm512_setr_epi32(0, -1, 2, -3, 4, -5, 6, -7, 0, 0, 0, 0, 0, 0, 0, 0);
51305 assert_eq_m512i(r, e);
51306 }
51307
51308 #[simd_test(enable = "avx512f,avx512vl")]
51309 fn test_mm256_mask_cvttps_epi32() {
51310 let a = _mm256_set_ps(8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5);
51311 let src = _mm256_set1_epi32(0);
51312 let r = _mm256_mask_cvttps_epi32(src, 0, a);
51313 assert_eq_m256i(r, src);
51314 let r = _mm256_mask_cvttps_epi32(src, 0b11111111, a);
51315 let e = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15);
51316 assert_eq_m256i(r, e);
51317 }
51318
51319 #[simd_test(enable = "avx512f,avx512vl")]
51320 fn test_mm256_maskz_cvttps_epi32() {
51321 let a = _mm256_set_ps(8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5);
51322 let r = _mm256_maskz_cvttps_epi32(0, a);
51323 assert_eq_m256i(r, _mm256_setzero_si256());
51324 let r = _mm256_maskz_cvttps_epi32(0b11111111, a);
51325 let e = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15);
51326 assert_eq_m256i(r, e);
51327 }
51328
51329 #[simd_test(enable = "avx512f,avx512vl")]
51330 fn test_mm_mask_cvttps_epi32() {
51331 let a = _mm_set_ps(12., 13.5, 14., 15.5);
51332 let src = _mm_set1_epi32(0);
51333 let r = _mm_mask_cvttps_epi32(src, 0, a);
51334 assert_eq_m128i(r, src);
51335 let r = _mm_mask_cvttps_epi32(src, 0b00001111, a);
51336 let e = _mm_set_epi32(12, 13, 14, 15);
51337 assert_eq_m128i(r, e);
51338 }
51339
51340 #[simd_test(enable = "avx512f,avx512vl")]
51341 fn test_mm_maskz_cvttps_epi32() {
51342 let a = _mm_set_ps(12., 13.5, 14., 15.5);
51343 let r = _mm_maskz_cvttps_epi32(0, a);
51344 assert_eq_m128i(r, _mm_setzero_si128());
51345 let r = _mm_maskz_cvttps_epi32(0b00001111, a);
51346 let e = _mm_set_epi32(12, 13, 14, 15);
51347 assert_eq_m128i(r, e);
51348 }
51349
51350 #[simd_test(enable = "avx512f")]
51351 fn test_mm512_cvttps_epu32() {
51352 let a = _mm512_setr_ps(
51353 0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
51354 );
51355 let r = _mm512_cvttps_epu32(a);
51356 let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 8, 9, 10, 11, 12, 13, 14, 15);
51357 assert_eq_m512i(r, e);
51358 }
51359
51360 #[simd_test(enable = "avx512f")]
51361 fn test_mm512_mask_cvttps_epu32() {
51362 let a = _mm512_setr_ps(
51363 0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
51364 );
51365 let src = _mm512_set1_epi32(0);
51366 let r = _mm512_mask_cvttps_epu32(src, 0, a);
51367 assert_eq_m512i(r, src);
51368 let r = _mm512_mask_cvttps_epu32(src, 0b00000000_11111111, a);
51369 let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 0, 0, 0, 0, 0, 0, 0, 0);
51370 assert_eq_m512i(r, e);
51371 }
51372
51373 #[simd_test(enable = "avx512f")]
51374 fn test_mm512_maskz_cvttps_epu32() {
51375 let a = _mm512_setr_ps(
51376 0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
51377 );
51378 let r = _mm512_maskz_cvttps_epu32(0, a);
51379 assert_eq_m512i(r, _mm512_setzero_si512());
51380 let r = _mm512_maskz_cvttps_epu32(0b00000000_11111111, a);
51381 let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 0, 0, 0, 0, 0, 0, 0, 0);
51382 assert_eq_m512i(r, e);
51383 }
51384
51385 #[simd_test(enable = "avx512f,avx512vl")]
51386 fn test_mm256_cvttps_epu32() {
51387 let a = _mm256_set_ps(8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5);
51388 let r = _mm256_cvttps_epu32(a);
51389 let e = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15);
51390 assert_eq_m256i(r, e);
51391 }
51392
51393 #[simd_test(enable = "avx512f,avx512vl")]
51394 fn test_mm256_mask_cvttps_epu32() {
51395 let a = _mm256_set_ps(8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5);
51396 let src = _mm256_set1_epi32(0);
51397 let r = _mm256_mask_cvttps_epu32(src, 0, a);
51398 assert_eq_m256i(r, src);
51399 let r = _mm256_mask_cvttps_epu32(src, 0b11111111, a);
51400 let e = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15);
51401 assert_eq_m256i(r, e);
51402 }
51403
51404 #[simd_test(enable = "avx512f,avx512vl")]
51405 fn test_mm256_maskz_cvttps_epu32() {
51406 let a = _mm256_set_ps(8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5);
51407 let r = _mm256_maskz_cvttps_epu32(0, a);
51408 assert_eq_m256i(r, _mm256_setzero_si256());
51409 let r = _mm256_maskz_cvttps_epu32(0b11111111, a);
51410 let e = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15);
51411 assert_eq_m256i(r, e);
51412 }
51413
51414 #[simd_test(enable = "avx512f,avx512vl")]
51415 fn test_mm_cvttps_epu32() {
51416 let a = _mm_set_ps(12., 13.5, 14., 15.5);
51417 let r = _mm_cvttps_epu32(a);
51418 let e = _mm_set_epi32(12, 13, 14, 15);
51419 assert_eq_m128i(r, e);
51420 }
51421
51422 #[simd_test(enable = "avx512f,avx512vl")]
51423 fn test_mm_mask_cvttps_epu32() {
51424 let a = _mm_set_ps(12., 13.5, 14., 15.5);
51425 let src = _mm_set1_epi32(0);
51426 let r = _mm_mask_cvttps_epu32(src, 0, a);
51427 assert_eq_m128i(r, src);
51428 let r = _mm_mask_cvttps_epu32(src, 0b00001111, a);
51429 let e = _mm_set_epi32(12, 13, 14, 15);
51430 assert_eq_m128i(r, e);
51431 }
51432
51433 #[simd_test(enable = "avx512f,avx512vl")]
51434 fn test_mm_maskz_cvttps_epu32() {
51435 let a = _mm_set_ps(12., 13.5, 14., 15.5);
51436 let r = _mm_maskz_cvttps_epu32(0, a);
51437 assert_eq_m128i(r, _mm_setzero_si128());
51438 let r = _mm_maskz_cvttps_epu32(0b00001111, a);
51439 let e = _mm_set_epi32(12, 13, 14, 15);
51440 assert_eq_m128i(r, e);
51441 }
51442
51443 #[simd_test(enable = "avx512f")]
51444 fn test_mm512_i32gather_ps() {
51445 let arr: [f32; 256] = core::array::from_fn(|i| i as f32);
51446 // A multiplier of 4 is word-addressing
51447 #[rustfmt::skip]
51448 let index = _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112,
51449 120, 128, 136, 144, 152, 160, 168, 176);
51450 let r = unsafe { _mm512_i32gather_ps::<4>(index, arr.as_ptr()) };
51451 #[rustfmt::skip]
51452 assert_eq_m512(r, _mm512_setr_ps(0., 16., 32., 48., 64., 80., 96., 112.,
51453 120., 128., 136., 144., 152., 160., 168., 176.));
51454 }
51455
51456 #[simd_test(enable = "avx512f")]
51457 fn test_mm512_mask_i32gather_ps() {
51458 let arr: [f32; 256] = core::array::from_fn(|i| i as f32);
51459 let src = _mm512_set1_ps(2.);
51460 let mask = 0b10101010_10101010;
51461 #[rustfmt::skip]
51462 let index = _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112,
51463 120, 128, 136, 144, 152, 160, 168, 176);
51464 // A multiplier of 4 is word-addressing
51465 let r = unsafe { _mm512_mask_i32gather_ps::<4>(src, mask, index, arr.as_ptr()) };
51466 #[rustfmt::skip]
51467 assert_eq_m512(r, _mm512_setr_ps(2., 16., 2., 48., 2., 80., 2., 112.,
51468 2., 128., 2., 144., 2., 160., 2., 176.));
51469 }
51470
51471 #[simd_test(enable = "avx512f")]
51472 fn test_mm512_i32gather_epi32() {
51473 let arr: [i32; 256] = core::array::from_fn(|i| i as i32);
51474 // A multiplier of 4 is word-addressing
51475 #[rustfmt::skip]
51476 let index = _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112,
51477 120, 128, 136, 144, 152, 160, 168, 176);
51478 let r = unsafe { _mm512_i32gather_epi32::<4>(index, arr.as_ptr()) };
51479 #[rustfmt::skip]
51480 assert_eq_m512i(r, _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112,
51481 120, 128, 136, 144, 152, 160, 168, 176));
51482 }
51483
51484 #[simd_test(enable = "avx512f")]
51485 fn test_mm512_mask_i32gather_epi32() {
51486 let arr: [i32; 256] = core::array::from_fn(|i| i as i32);
51487 let src = _mm512_set1_epi32(2);
51488 let mask = 0b10101010_10101010;
51489 let index = _mm512_setr_epi32(
51490 0, 16, 32, 48, 64, 80, 96, 112, 128, 144, 160, 176, 192, 208, 224, 240,
51491 );
51492 // A multiplier of 4 is word-addressing
51493 let r = unsafe { _mm512_mask_i32gather_epi32::<4>(src, mask, index, arr.as_ptr()) };
51494 assert_eq_m512i(
51495 r,
51496 _mm512_setr_epi32(2, 16, 2, 48, 2, 80, 2, 112, 2, 144, 2, 176, 2, 208, 2, 240),
51497 );
51498 }
51499
51500 #[simd_test(enable = "avx512f")]
51501 fn test_mm512_i32scatter_ps() {
51502 let mut arr = [0f32; 256];
51503 #[rustfmt::skip]
51504 let index = _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112,
51505 128, 144, 160, 176, 192, 208, 224, 240);
51506 let src = _mm512_setr_ps(
51507 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
51508 );
51509 // A multiplier of 4 is word-addressing
51510 unsafe {
51511 _mm512_i32scatter_ps::<4>(arr.as_mut_ptr(), index, src);
51512 }
51513 let mut expected = [0f32; 256];
51514 for i in 0..16 {
51515 expected[i * 16] = (i + 1) as f32;
51516 }
51517 assert_eq!(&arr[..], &expected[..],);
51518 }
51519
51520 #[simd_test(enable = "avx512f")]
51521 fn test_mm512_mask_i32scatter_ps() {
51522 let mut arr = [0f32; 256];
51523 let mask = 0b10101010_10101010;
51524 #[rustfmt::skip]
51525 let index = _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112,
51526 128, 144, 160, 176, 192, 208, 224, 240);
51527 let src = _mm512_setr_ps(
51528 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
51529 );
51530 // A multiplier of 4 is word-addressing
51531 unsafe {
51532 _mm512_mask_i32scatter_ps::<4>(arr.as_mut_ptr(), mask, index, src);
51533 }
51534 let mut expected = [0f32; 256];
51535 for i in 0..8 {
51536 expected[i * 32 + 16] = 2. * (i + 1) as f32;
51537 }
51538 assert_eq!(&arr[..], &expected[..],);
51539 }
51540
51541 #[simd_test(enable = "avx512f")]
51542 fn test_mm512_i32scatter_epi32() {
51543 let mut arr = [0i32; 256];
51544 #[rustfmt::skip]
51545
51546 let index = _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112,
51547 128, 144, 160, 176, 192, 208, 224, 240);
51548 let src = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
51549 // A multiplier of 4 is word-addressing
51550 unsafe {
51551 _mm512_i32scatter_epi32::<4>(arr.as_mut_ptr(), index, src);
51552 }
51553 let mut expected = [0i32; 256];
51554 for i in 0..16 {
51555 expected[i * 16] = (i + 1) as i32;
51556 }
51557 assert_eq!(&arr[..], &expected[..],);
51558 }
51559
51560 #[simd_test(enable = "avx512f")]
51561 fn test_mm512_mask_i32scatter_epi32() {
51562 let mut arr = [0i32; 256];
51563 let mask = 0b10101010_10101010;
51564 #[rustfmt::skip]
51565 let index = _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112,
51566 128, 144, 160, 176, 192, 208, 224, 240);
51567 let src = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
51568 // A multiplier of 4 is word-addressing
51569 unsafe {
51570 _mm512_mask_i32scatter_epi32::<4>(arr.as_mut_ptr(), mask, index, src);
51571 }
51572 let mut expected = [0i32; 256];
51573 for i in 0..8 {
51574 expected[i * 32 + 16] = 2 * (i + 1) as i32;
51575 }
51576 assert_eq!(&arr[..], &expected[..],);
51577 }
51578
51579 #[simd_test(enable = "avx512f")]
51580 fn test_mm512_cmplt_ps_mask() {
51581 #[rustfmt::skip]
51582 let a = _mm512_set_ps(0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.,
51583 0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.);
51584 let b = _mm512_set1_ps(-1.);
51585 let m = _mm512_cmplt_ps_mask(a, b);
51586 assert_eq!(m, 0b00000101_00000101);
51587 }
51588
51589 #[simd_test(enable = "avx512f")]
51590 fn test_mm512_mask_cmplt_ps_mask() {
51591 #[rustfmt::skip]
51592 let a = _mm512_set_ps(0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.,
51593 0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.);
51594 let b = _mm512_set1_ps(-1.);
51595 let mask = 0b01100110_01100110;
51596 let r = _mm512_mask_cmplt_ps_mask(mask, a, b);
51597 assert_eq!(r, 0b00000100_00000100);
51598 }
51599
51600 #[simd_test(enable = "avx512f")]
51601 fn test_mm512_cmpnlt_ps_mask() {
51602 #[rustfmt::skip]
51603 let a = _mm512_set_ps(0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.,
51604 0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.);
51605 let b = _mm512_set1_ps(-1.);
51606 assert_eq!(_mm512_cmpnlt_ps_mask(a, b), !_mm512_cmplt_ps_mask(a, b));
51607 }
51608
51609 #[simd_test(enable = "avx512f")]
51610 fn test_mm512_mask_cmpnlt_ps_mask() {
51611 #[rustfmt::skip]
51612 let a = _mm512_set_ps(0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.,
51613 0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.);
51614 let b = _mm512_set1_ps(-1.);
51615 let mask = 0b01111010_01111010;
51616 assert_eq!(_mm512_mask_cmpnlt_ps_mask(mask, a, b), 0b01111010_01111010);
51617 }
51618
51619 #[simd_test(enable = "avx512f")]
51620 fn test_mm512_cmpnle_ps_mask() {
51621 #[rustfmt::skip]
51622 let a = _mm512_set_ps(0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.,
51623 0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.);
51624 let b = _mm512_set1_ps(-1.);
51625 let m = _mm512_cmpnle_ps_mask(b, a);
51626 assert_eq!(m, 0b00001101_00001101);
51627 }
51628
51629 #[simd_test(enable = "avx512f")]
51630 fn test_mm512_mask_cmpnle_ps_mask() {
51631 #[rustfmt::skip]
51632 let a = _mm512_set_ps(0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.,
51633 0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.);
51634 let b = _mm512_set1_ps(-1.);
51635 let mask = 0b01100110_01100110;
51636 let r = _mm512_mask_cmpnle_ps_mask(mask, b, a);
51637 assert_eq!(r, 0b00000100_00000100);
51638 }
51639
51640 #[simd_test(enable = "avx512f")]
51641 fn test_mm512_cmple_ps_mask() {
51642 #[rustfmt::skip]
51643 let a = _mm512_set_ps(0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.,
51644 0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.);
51645 let b = _mm512_set1_ps(-1.);
51646 assert_eq!(_mm512_cmple_ps_mask(a, b), 0b00100101_00100101);
51647 }
51648
51649 #[simd_test(enable = "avx512f")]
51650 fn test_mm512_mask_cmple_ps_mask() {
51651 #[rustfmt::skip]
51652 let a = _mm512_set_ps(0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.,
51653 0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.);
51654 let b = _mm512_set1_ps(-1.);
51655 let mask = 0b01111010_01111010;
51656 assert_eq!(_mm512_mask_cmple_ps_mask(mask, a, b), 0b00100000_00100000);
51657 }
51658
51659 #[simd_test(enable = "avx512f")]
51660 fn test_mm512_cmpeq_ps_mask() {
51661 #[rustfmt::skip]
51662 let a = _mm512_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, f32::NAN, -100.,
51663 0., 1., -1., 13., f32::MAX, f32::MIN, f32::NAN, -100.);
51664 #[rustfmt::skip]
51665 let b = _mm512_set_ps(0., 1., 13., 42., f32::MAX, f32::MIN, f32::NAN, -100.,
51666 0., 1., 13., 42., f32::MAX, f32::MIN, f32::NAN, -100.);
51667 let m = _mm512_cmpeq_ps_mask(b, a);
51668 assert_eq!(m, 0b11001101_11001101);
51669 }
51670
51671 #[simd_test(enable = "avx512f")]
51672 fn test_mm512_mask_cmpeq_ps_mask() {
51673 #[rustfmt::skip]
51674 let a = _mm512_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, f32::NAN, -100.,
51675 0., 1., -1., 13., f32::MAX, f32::MIN, f32::NAN, -100.);
51676 #[rustfmt::skip]
51677 let b = _mm512_set_ps(0., 1., 13., 42., f32::MAX, f32::MIN, f32::NAN, -100.,
51678 0., 1., 13., 42., f32::MAX, f32::MIN, f32::NAN, -100.);
51679 let mask = 0b01111010_01111010;
51680 let r = _mm512_mask_cmpeq_ps_mask(mask, b, a);
51681 assert_eq!(r, 0b01001000_01001000);
51682 }
51683
51684 #[simd_test(enable = "avx512f")]
51685 fn test_mm512_cmpneq_ps_mask() {
51686 #[rustfmt::skip]
51687 let a = _mm512_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, f32::NAN, -100.,
51688 0., 1., -1., 13., f32::MAX, f32::MIN, f32::NAN, -100.);
51689 #[rustfmt::skip]
51690 let b = _mm512_set_ps(0., 1., 13., 42., f32::MAX, f32::MIN, f32::NAN, -100.,
51691 0., 1., 13., 42., f32::MAX, f32::MIN, f32::NAN, -100.);
51692 let m = _mm512_cmpneq_ps_mask(b, a);
51693 assert_eq!(m, 0b00110010_00110010);
51694 }
51695
51696 #[simd_test(enable = "avx512f")]
51697 fn test_mm512_mask_cmpneq_ps_mask() {
51698 #[rustfmt::skip]
51699 let a = _mm512_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, f32::NAN, -100.,
51700 0., 1., -1., 13., f32::MAX, f32::MIN, f32::NAN, -100.);
51701 #[rustfmt::skip]
51702 let b = _mm512_set_ps(0., 1., 13., 42., f32::MAX, f32::MIN, f32::NAN, -100.,
51703 0., 1., 13., 42., f32::MAX, f32::MIN, f32::NAN, -100.);
51704 let mask = 0b01111010_01111010;
51705 let r = _mm512_mask_cmpneq_ps_mask(mask, b, a);
51706 assert_eq!(r, 0b00110010_00110010)
51707 }
51708
51709 #[simd_test(enable = "avx512f")]
51710 fn test_mm512_cmp_ps_mask() {
51711 #[rustfmt::skip]
51712 let a = _mm512_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.,
51713 0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.);
51714 let b = _mm512_set1_ps(-1.);
51715 let m = _mm512_cmp_ps_mask::<_CMP_LT_OQ>(a, b);
51716 assert_eq!(m, 0b00000101_00000101);
51717 }
51718
51719 #[simd_test(enable = "avx512f")]
51720 fn test_mm512_mask_cmp_ps_mask() {
51721 #[rustfmt::skip]
51722 let a = _mm512_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.,
51723 0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.);
51724 let b = _mm512_set1_ps(-1.);
51725 let mask = 0b01100110_01100110;
51726 let r = _mm512_mask_cmp_ps_mask::<_CMP_LT_OQ>(mask, a, b);
51727 assert_eq!(r, 0b00000100_00000100);
51728 }
51729
51730 #[simd_test(enable = "avx512f,avx512vl")]
51731 fn test_mm256_cmp_ps_mask() {
51732 let a = _mm256_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.);
51733 let b = _mm256_set1_ps(-1.);
51734 let m = _mm256_cmp_ps_mask::<_CMP_LT_OQ>(a, b);
51735 assert_eq!(m, 0b00000101);
51736 }
51737
51738 #[simd_test(enable = "avx512f,avx512vl")]
51739 fn test_mm256_mask_cmp_ps_mask() {
51740 let a = _mm256_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.);
51741 let b = _mm256_set1_ps(-1.);
51742 let mask = 0b01100110;
51743 let r = _mm256_mask_cmp_ps_mask::<_CMP_LT_OQ>(mask, a, b);
51744 assert_eq!(r, 0b00000100);
51745 }
51746
51747 #[simd_test(enable = "avx512f,avx512vl")]
51748 fn test_mm_cmp_ps_mask() {
51749 let a = _mm_set_ps(0., 1., -1., 13.);
51750 let b = _mm_set1_ps(1.);
51751 let m = _mm_cmp_ps_mask::<_CMP_LT_OQ>(a, b);
51752 assert_eq!(m, 0b00001010);
51753 }
51754
51755 #[simd_test(enable = "avx512f,avx512vl")]
51756 fn test_mm_mask_cmp_ps_mask() {
51757 let a = _mm_set_ps(0., 1., -1., 13.);
51758 let b = _mm_set1_ps(1.);
51759 let mask = 0b11111111;
51760 let r = _mm_mask_cmp_ps_mask::<_CMP_LT_OQ>(mask, a, b);
51761 assert_eq!(r, 0b00001010);
51762 }
51763
51764 #[simd_test(enable = "avx512f")]
51765 fn test_mm512_cmp_round_ps_mask() {
51766 #[rustfmt::skip]
51767 let a = _mm512_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.,
51768 0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.);
51769 let b = _mm512_set1_ps(-1.);
51770 let m = _mm512_cmp_round_ps_mask::<_CMP_LT_OQ, _MM_FROUND_CUR_DIRECTION>(a, b);
51771 assert_eq!(m, 0b00000101_00000101);
51772 }
51773
51774 #[simd_test(enable = "avx512f")]
51775 fn test_mm512_mask_cmp_round_ps_mask() {
51776 #[rustfmt::skip]
51777 let a = _mm512_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.,
51778 0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.);
51779 let b = _mm512_set1_ps(-1.);
51780 let mask = 0b01100110_01100110;
51781 let r = _mm512_mask_cmp_round_ps_mask::<_CMP_LT_OQ, _MM_FROUND_CUR_DIRECTION>(mask, a, b);
51782 assert_eq!(r, 0b00000100_00000100);
51783 }
51784
51785 #[simd_test(enable = "avx512f")]
51786 fn test_mm512_cmpord_ps_mask() {
51787 #[rustfmt::skip]
51788 let a = _mm512_set_ps(f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, -1., f32::NAN, 0.,
51789 f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, 1., f32::NAN, 2.);
51790 #[rustfmt::skip]
51791 let b = _mm512_set_ps(f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, -1., 0.,
51792 f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, -1., 2.);
51793 let m = _mm512_cmpord_ps_mask(a, b);
51794 assert_eq!(m, 0b00000101_00000101);
51795 }
51796
51797 #[simd_test(enable = "avx512f")]
51798 fn test_mm512_mask_cmpord_ps_mask() {
51799 #[rustfmt::skip]
51800 let a = _mm512_set_ps(f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, -1., f32::NAN, 0.,
51801 f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, 1., f32::NAN, 2.);
51802 #[rustfmt::skip]
51803 let b = _mm512_set_ps(f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, -1., 0.,
51804 f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, -1., 2.);
51805 let mask = 0b11000011_11000011;
51806 let m = _mm512_mask_cmpord_ps_mask(mask, a, b);
51807 assert_eq!(m, 0b00000001_00000001);
51808 }
51809
51810 #[simd_test(enable = "avx512f")]
51811 fn test_mm512_cmpunord_ps_mask() {
51812 #[rustfmt::skip]
51813 let a = _mm512_set_ps(f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, -1., f32::NAN, 0.,
51814 f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, 1., f32::NAN, 2.);
51815 #[rustfmt::skip]
51816 let b = _mm512_set_ps(f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, -1., 0.,
51817 f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, -1., 2.);
51818 let m = _mm512_cmpunord_ps_mask(a, b);
51819
51820 assert_eq!(m, 0b11111010_11111010);
51821 }
51822
51823 #[simd_test(enable = "avx512f")]
51824 fn test_mm512_mask_cmpunord_ps_mask() {
51825 #[rustfmt::skip]
51826 let a = _mm512_set_ps(f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, -1., f32::NAN, 0.,
51827 f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, 1., f32::NAN, 2.);
51828 #[rustfmt::skip]
51829 let b = _mm512_set_ps(f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, -1., 0.,
51830 f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, -1., 2.);
51831 let mask = 0b00001111_00001111;
51832 let m = _mm512_mask_cmpunord_ps_mask(mask, a, b);
51833 assert_eq!(m, 0b000001010_00001010);
51834 }
51835
51836 #[simd_test(enable = "avx512f")]
51837 fn test_mm_cmp_ss_mask() {
51838 let a = _mm_setr_ps(2., 1., 1., 1.);
51839 let b = _mm_setr_ps(1., 2., 2., 2.);
51840 let m = _mm_cmp_ss_mask::<_CMP_GE_OS>(a, b);
51841 assert_eq!(m, 1);
51842 }
51843
51844 #[simd_test(enable = "avx512f")]
51845 fn test_mm_mask_cmp_ss_mask() {
51846 let a = _mm_setr_ps(2., 1., 1., 1.);
51847 let b = _mm_setr_ps(1., 2., 2., 2.);
51848 let m = _mm_mask_cmp_ss_mask::<_CMP_GE_OS>(0b10, a, b);
51849 assert_eq!(m, 0);
51850 let m = _mm_mask_cmp_ss_mask::<_CMP_GE_OS>(0b1, a, b);
51851 assert_eq!(m, 1);
51852 }
51853
51854 #[simd_test(enable = "avx512f")]
51855 fn test_mm_cmp_round_ss_mask() {
51856 let a = _mm_setr_ps(2., 1., 1., 1.);
51857 let b = _mm_setr_ps(1., 2., 2., 2.);
51858 let m = _mm_cmp_round_ss_mask::<_CMP_GE_OS, _MM_FROUND_CUR_DIRECTION>(a, b);
51859 assert_eq!(m, 1);
51860 }
51861
51862 #[simd_test(enable = "avx512f")]
51863 fn test_mm_mask_cmp_round_ss_mask() {
51864 let a = _mm_setr_ps(2., 1., 1., 1.);
51865 let b = _mm_setr_ps(1., 2., 2., 2.);
51866 let m = _mm_mask_cmp_round_ss_mask::<_CMP_GE_OS, _MM_FROUND_CUR_DIRECTION>(0b10, a, b);
51867 assert_eq!(m, 0);
51868 let m = _mm_mask_cmp_round_ss_mask::<_CMP_GE_OS, _MM_FROUND_CUR_DIRECTION>(0b1, a, b);
51869 assert_eq!(m, 1);
51870 }
51871
51872 #[simd_test(enable = "avx512f")]
51873 fn test_mm_cmp_sd_mask() {
51874 let a = _mm_setr_pd(2., 1.);
51875 let b = _mm_setr_pd(1., 2.);
51876 let m = _mm_cmp_sd_mask::<_CMP_GE_OS>(a, b);
51877 assert_eq!(m, 1);
51878 }
51879
51880 #[simd_test(enable = "avx512f")]
51881 fn test_mm_mask_cmp_sd_mask() {
51882 let a = _mm_setr_pd(2., 1.);
51883 let b = _mm_setr_pd(1., 2.);
51884 let m = _mm_mask_cmp_sd_mask::<_CMP_GE_OS>(0b10, a, b);
51885 assert_eq!(m, 0);
51886 let m = _mm_mask_cmp_sd_mask::<_CMP_GE_OS>(0b1, a, b);
51887 assert_eq!(m, 1);
51888 }
51889
51890 #[simd_test(enable = "avx512f")]
51891 fn test_mm_cmp_round_sd_mask() {
51892 let a = _mm_setr_pd(2., 1.);
51893 let b = _mm_setr_pd(1., 2.);
51894 let m = _mm_cmp_round_sd_mask::<_CMP_GE_OS, _MM_FROUND_CUR_DIRECTION>(a, b);
51895 assert_eq!(m, 1);
51896 }
51897
51898 #[simd_test(enable = "avx512f")]
51899 fn test_mm_mask_cmp_round_sd_mask() {
51900 let a = _mm_setr_pd(2., 1.);
51901 let b = _mm_setr_pd(1., 2.);
51902 let m = _mm_mask_cmp_round_sd_mask::<_CMP_GE_OS, _MM_FROUND_CUR_DIRECTION>(0b10, a, b);
51903 assert_eq!(m, 0);
51904 let m = _mm_mask_cmp_round_sd_mask::<_CMP_GE_OS, _MM_FROUND_CUR_DIRECTION>(0b1, a, b);
51905 assert_eq!(m, 1);
51906 }
51907
51908 #[simd_test(enable = "avx512f")]
51909 const fn test_mm512_cmplt_epu32_mask() {
51910 #[rustfmt::skip]
51911 let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
51912 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
51913 let b = _mm512_set1_epi32(-1);
51914 let m = _mm512_cmplt_epu32_mask(a, b);
51915 assert_eq!(m, 0b11001111_11001111);
51916 }
51917
51918 #[simd_test(enable = "avx512f")]
51919 const fn test_mm512_mask_cmplt_epu32_mask() {
51920 #[rustfmt::skip]
51921 let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
51922 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
51923 let b = _mm512_set1_epi32(-1);
51924 let mask = 0b01111010_01111010;
51925 let r = _mm512_mask_cmplt_epu32_mask(mask, a, b);
51926 assert_eq!(r, 0b01001010_01001010);
51927 }
51928
51929 #[simd_test(enable = "avx512f,avx512vl")]
51930 const fn test_mm256_cmplt_epu32_mask() {
51931 let a = _mm256_set_epi32(0, 1, 2, u32::MAX as i32, i32::MAX, 101, 100, 99);
51932 let b = _mm256_set1_epi32(1);
51933 let r = _mm256_cmplt_epu32_mask(a, b);
51934 assert_eq!(r, 0b10000000);
51935 }
51936
51937 #[simd_test(enable = "avx512f,avx512vl")]
51938 const fn test_mm256_mask_cmplt_epu32_mask() {
51939 let a = _mm256_set_epi32(0, 1, 2, u32::MAX as i32, i32::MAX, 101, 100, 99);
51940 let b = _mm256_set1_epi32(1);
51941 let mask = 0b11111111;
51942 let r = _mm256_mask_cmplt_epu32_mask(mask, a, b);
51943 assert_eq!(r, 0b10000000);
51944 }
51945
51946 #[simd_test(enable = "avx512f,avx512vl")]
51947 const fn test_mm_cmplt_epu32_mask() {
51948 let a = _mm_set_epi32(0, 1, 2, u32::MAX as i32);
51949 let b = _mm_set1_epi32(1);
51950 let r = _mm_cmplt_epu32_mask(a, b);
51951 assert_eq!(r, 0b00001000);
51952 }
51953
51954 #[simd_test(enable = "avx512f,avx512vl")]
51955 const fn test_mm_mask_cmplt_epu32_mask() {
51956 let a = _mm_set_epi32(0, 1, 2, u32::MAX as i32);
51957 let b = _mm_set1_epi32(1);
51958 let mask = 0b11111111;
51959 let r = _mm_mask_cmplt_epu32_mask(mask, a, b);
51960 assert_eq!(r, 0b00001000);
51961 }
51962
51963 #[simd_test(enable = "avx512f")]
51964 const fn test_mm512_cmpgt_epu32_mask() {
51965 #[rustfmt::skip]
51966 let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
51967 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
51968 let b = _mm512_set1_epi32(-1);
51969 let m = _mm512_cmpgt_epu32_mask(b, a);
51970 assert_eq!(m, 0b11001111_11001111);
51971 }
51972
51973 #[simd_test(enable = "avx512f")]
51974 const fn test_mm512_mask_cmpgt_epu32_mask() {
51975 #[rustfmt::skip]
51976 let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
51977 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
51978 let b = _mm512_set1_epi32(-1);
51979 let mask = 0b01111010_01111010;
51980 let r = _mm512_mask_cmpgt_epu32_mask(mask, b, a);
51981 assert_eq!(r, 0b01001010_01001010);
51982 }
51983
51984 #[simd_test(enable = "avx512f,avx512vl")]
51985 const fn test_mm256_cmpgt_epu32_mask() {
51986 let a = _mm256_set_epi32(0, 1, 2, u32::MAX as i32, i32::MAX, 99, 100, 101);
51987 let b = _mm256_set1_epi32(1);
51988 let r = _mm256_cmpgt_epu32_mask(a, b);
51989 assert_eq!(r, 0b00111111);
51990 }
51991
51992 #[simd_test(enable = "avx512f,avx512vl")]
51993 const fn test_mm256_mask_cmpgt_epu32_mask() {
51994 let a = _mm256_set_epi32(0, 1, 2, u32::MAX as i32, i32::MAX, 99, 100, 101);
51995 let b = _mm256_set1_epi32(1);
51996 let mask = 0b11111111;
51997 let r = _mm256_mask_cmpgt_epu32_mask(mask, a, b);
51998 assert_eq!(r, 0b00111111);
51999 }
52000
52001 #[simd_test(enable = "avx512f,avx512vl")]
52002 const fn test_mm_cmpgt_epu32_mask() {
52003 let a = _mm_set_epi32(0, 1, 2, u32::MAX as i32);
52004 let b = _mm_set1_epi32(1);
52005 let r = _mm_cmpgt_epu32_mask(a, b);
52006 assert_eq!(r, 0b00000011);
52007 }
52008
52009 #[simd_test(enable = "avx512f,avx512vl")]
52010 const fn test_mm_mask_cmpgt_epu32_mask() {
52011 let a = _mm_set_epi32(0, 1, 2, u32::MAX as i32);
52012 let b = _mm_set1_epi32(1);
52013 let mask = 0b11111111;
52014 let r = _mm_mask_cmpgt_epu32_mask(mask, a, b);
52015 assert_eq!(r, 0b00000011);
52016 }
52017
52018 #[simd_test(enable = "avx512f")]
52019 const fn test_mm512_cmple_epu32_mask() {
52020 #[rustfmt::skip]
52021 let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
52022 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
52023 let b = _mm512_set1_epi32(-1);
52024 assert_eq!(
52025 _mm512_cmple_epu32_mask(a, b),
52026 !_mm512_cmpgt_epu32_mask(a, b)
52027 )
52028 }
52029
52030 #[simd_test(enable = "avx512f")]
52031 const fn test_mm512_mask_cmple_epu32_mask() {
52032 #[rustfmt::skip]
52033 let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
52034 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
52035 let b = _mm512_set1_epi32(-1);
52036 let mask = 0b01111010_01111010;
52037 assert_eq!(
52038 _mm512_mask_cmple_epu32_mask(mask, a, b),
52039 0b01111010_01111010
52040 );
52041 }
52042
52043 #[simd_test(enable = "avx512f,avx512vl")]
52044 const fn test_mm256_cmple_epu32_mask() {
52045 let a = _mm256_set_epi32(0, 1, 2, u32::MAX as i32, i32::MAX, 200, 100, 101);
52046 let b = _mm256_set1_epi32(1);
52047 let r = _mm256_cmple_epu32_mask(a, b);
52048 assert_eq!(r, 0b11000000)
52049 }
52050
52051 #[simd_test(enable = "avx512f,avx512vl")]
52052 const fn test_mm256_mask_cmple_epu32_mask() {
52053 let a = _mm256_set_epi32(0, 1, 2, u32::MAX as i32, i32::MAX, 200, 100, 101);
52054 let b = _mm256_set1_epi32(1);
52055 let mask = 0b11111111;
52056 let r = _mm256_mask_cmple_epu32_mask(mask, a, b);
52057 assert_eq!(r, 0b11000000)
52058 }
52059
52060 #[simd_test(enable = "avx512f,avx512vl")]
52061 const fn test_mm_cmple_epu32_mask() {
52062 let a = _mm_set_epi32(0, 1, 2, u32::MAX as i32);
52063 let b = _mm_set1_epi32(1);
52064 let r = _mm_cmple_epu32_mask(a, b);
52065 assert_eq!(r, 0b00001100)
52066 }
52067
52068 #[simd_test(enable = "avx512f,avx512vl")]
52069 const fn test_mm_mask_cmple_epu32_mask() {
52070 let a = _mm_set_epi32(0, 1, 2, u32::MAX as i32);
52071 let b = _mm_set1_epi32(1);
52072 let mask = 0b11111111;
52073 let r = _mm_mask_cmple_epu32_mask(mask, a, b);
52074 assert_eq!(r, 0b00001100)
52075 }
52076
52077 #[simd_test(enable = "avx512f")]
52078 const fn test_mm512_cmpge_epu32_mask() {
52079 #[rustfmt::skip]
52080 let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
52081 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
52082 let b = _mm512_set1_epi32(-1);
52083 assert_eq!(
52084 _mm512_cmpge_epu32_mask(a, b),
52085 !_mm512_cmplt_epu32_mask(a, b)
52086 )
52087 }
52088
52089 #[simd_test(enable = "avx512f")]
52090 const fn test_mm512_mask_cmpge_epu32_mask() {
52091 #[rustfmt::skip]
52092 let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
52093 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
52094 let b = _mm512_set1_epi32(-1);
52095 let mask = 0b01111010_01111010;
52096 assert_eq!(_mm512_mask_cmpge_epu32_mask(mask, a, b), 0b01100000_0110000);
52097 }
52098
52099 #[simd_test(enable = "avx512f,avx512vl")]
52100 const fn test_mm256_cmpge_epu32_mask() {
52101 let a = _mm256_set_epi32(0, 1, 2, u32::MAX as i32, i32::MAX, 300, 100, 200);
52102 let b = _mm256_set1_epi32(1);
52103 let r = _mm256_cmpge_epu32_mask(a, b);
52104 assert_eq!(r, 0b01111111)
52105 }
52106
52107 #[simd_test(enable = "avx512f,avx512vl")]
52108 const fn test_mm256_mask_cmpge_epu32_mask() {
52109 let a = _mm256_set_epi32(0, 1, 2, u32::MAX as i32, i32::MAX, 300, 100, 200);
52110 let b = _mm256_set1_epi32(1);
52111 let mask = 0b11111111;
52112 let r = _mm256_mask_cmpge_epu32_mask(mask, a, b);
52113 assert_eq!(r, 0b01111111)
52114 }
52115
52116 #[simd_test(enable = "avx512f,avx512vl")]
52117 const fn test_mm_cmpge_epu32_mask() {
52118 let a = _mm_set_epi32(0, 1, 2, u32::MAX as i32);
52119 let b = _mm_set1_epi32(1);
52120 let r = _mm_cmpge_epu32_mask(a, b);
52121 assert_eq!(r, 0b00000111)
52122 }
52123
52124 #[simd_test(enable = "avx512f,avx512vl")]
52125 const fn test_mm_mask_cmpge_epu32_mask() {
52126 let a = _mm_set_epi32(0, 1, 2, u32::MAX as i32);
52127 let b = _mm_set1_epi32(1);
52128 let mask = 0b11111111;
52129 let r = _mm_mask_cmpge_epu32_mask(mask, a, b);
52130 assert_eq!(r, 0b00000111)
52131 }
52132
52133 #[simd_test(enable = "avx512f")]
52134 const fn test_mm512_cmpeq_epu32_mask() {
52135 #[rustfmt::skip]
52136 let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
52137 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
52138 #[rustfmt::skip]
52139 let b = _mm512_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100,
52140 0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
52141 let m = _mm512_cmpeq_epu32_mask(b, a);
52142 assert_eq!(m, 0b11001111_11001111);
52143 }
52144
52145 #[simd_test(enable = "avx512f")]
52146 const fn test_mm512_mask_cmpeq_epu32_mask() {
52147 #[rustfmt::skip]
52148 let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
52149 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
52150 #[rustfmt::skip]
52151 let b = _mm512_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100,
52152 0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
52153 let mask = 0b01111010_01111010;
52154 let r = _mm512_mask_cmpeq_epu32_mask(mask, b, a);
52155 assert_eq!(r, 0b01001010_01001010);
52156 }
52157
52158 #[simd_test(enable = "avx512f,avx512vl")]
52159 const fn test_mm256_cmpeq_epu32_mask() {
52160 let a = _mm256_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
52161 let b = _mm256_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
52162 let m = _mm256_cmpeq_epu32_mask(b, a);
52163 assert_eq!(m, 0b11001111);
52164 }
52165
52166 #[simd_test(enable = "avx512f,avx512vl")]
52167 const fn test_mm256_mask_cmpeq_epu32_mask() {
52168 let a = _mm256_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
52169 let b = _mm256_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
52170 let mask = 0b01111010;
52171 let r = _mm256_mask_cmpeq_epu32_mask(mask, b, a);
52172 assert_eq!(r, 0b01001010);
52173 }
52174
52175 #[simd_test(enable = "avx512f,avx512vl")]
52176 const fn test_mm_cmpeq_epu32_mask() {
52177 let a = _mm_set_epi32(0, 1, -1, u32::MAX as i32);
52178 let b = _mm_set_epi32(0, 1, 13, 42);
52179 let m = _mm_cmpeq_epu32_mask(b, a);
52180 assert_eq!(m, 0b00001100);
52181 }
52182
52183 #[simd_test(enable = "avx512f,avx512vl")]
52184 const fn test_mm_mask_cmpeq_epu32_mask() {
52185 let a = _mm_set_epi32(0, 1, -1, u32::MAX as i32);
52186 let b = _mm_set_epi32(0, 1, 13, 42);
52187 let mask = 0b11111111;
52188 let r = _mm_mask_cmpeq_epu32_mask(mask, b, a);
52189 assert_eq!(r, 0b00001100);
52190 }
52191
52192 #[simd_test(enable = "avx512f")]
52193 const fn test_mm512_cmpneq_epu32_mask() {
52194 #[rustfmt::skip]
52195 let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
52196 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
52197 #[rustfmt::skip]
52198 let b = _mm512_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100,
52199 0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
52200 let m = _mm512_cmpneq_epu32_mask(b, a);
52201 assert_eq!(m, !_mm512_cmpeq_epu32_mask(b, a));
52202 }
52203
52204 #[simd_test(enable = "avx512f")]
52205 const fn test_mm512_mask_cmpneq_epu32_mask() {
52206 #[rustfmt::skip]
52207 let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, -100, 100,
52208 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, -100, 100);
52209 #[rustfmt::skip]
52210 let b = _mm512_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100,
52211 0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
52212 let mask = 0b01111010_01111010;
52213 let r = _mm512_mask_cmpneq_epu32_mask(mask, b, a);
52214 assert_eq!(r, 0b00110010_00110010);
52215 }
52216
52217 #[simd_test(enable = "avx512f,avx512vl")]
52218 const fn test_mm256_cmpneq_epu32_mask() {
52219 let a = _mm256_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, -100, 100);
52220 let b = _mm256_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, -100, 100);
52221 let r = _mm256_cmpneq_epu32_mask(b, a);
52222 assert_eq!(r, 0b00110000);
52223 }
52224
52225 #[simd_test(enable = "avx512f,avx512vl")]
52226 const fn test_mm256_mask_cmpneq_epu32_mask() {
52227 let a = _mm256_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, -100, 100);
52228 let b = _mm256_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, -100, 100);
52229 let mask = 0b11111111;
52230 let r = _mm256_mask_cmpneq_epu32_mask(mask, b, a);
52231 assert_eq!(r, 0b00110000);
52232 }
52233
52234 #[simd_test(enable = "avx512f,avx512vl")]
52235 const fn test_mm_cmpneq_epu32_mask() {
52236 let a = _mm_set_epi32(0, 1, -1, u32::MAX as i32);
52237 let b = _mm_set_epi32(0, 1, 13, 42);
52238 let r = _mm_cmpneq_epu32_mask(b, a);
52239 assert_eq!(r, 0b00000011);
52240 }
52241
52242 #[simd_test(enable = "avx512f,avx512vl")]
52243 const fn test_mm_mask_cmpneq_epu32_mask() {
52244 let a = _mm_set_epi32(0, 1, -1, u32::MAX as i32);
52245 let b = _mm_set_epi32(0, 1, 13, 42);
52246 let mask = 0b11111111;
52247 let r = _mm_mask_cmpneq_epu32_mask(mask, b, a);
52248 assert_eq!(r, 0b00000011);
52249 }
52250
52251 #[simd_test(enable = "avx512f")]
52252 const fn test_mm512_cmp_epu32_mask() {
52253 #[rustfmt::skip]
52254 let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
52255 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
52256 let b = _mm512_set1_epi32(-1);
52257 let m = _mm512_cmp_epu32_mask::<_MM_CMPINT_LT>(a, b);
52258 assert_eq!(m, 0b11001111_11001111);
52259 }
52260
52261 #[simd_test(enable = "avx512f")]
52262 const fn test_mm512_mask_cmp_epu32_mask() {
52263 #[rustfmt::skip]
52264 let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
52265 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
52266 let b = _mm512_set1_epi32(-1);
52267 let mask = 0b01111010_01111010;
52268 let r = _mm512_mask_cmp_epu32_mask::<_MM_CMPINT_LT>(mask, a, b);
52269 assert_eq!(r, 0b01001010_01001010);
52270 }
52271
52272 #[simd_test(enable = "avx512f,avx512vl")]
52273 const fn test_mm256_cmp_epu32_mask() {
52274 let a = _mm256_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
52275 let b = _mm256_set1_epi32(-1);
52276 let m = _mm256_cmp_epu32_mask::<_MM_CMPINT_LT>(a, b);
52277 assert_eq!(m, 0b11001111);
52278 }
52279
52280 #[simd_test(enable = "avx512f,avx512vl")]
52281 const fn test_mm256_mask_cmp_epu32_mask() {
52282 let a = _mm256_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
52283 let b = _mm256_set1_epi32(-1);
52284 let mask = 0b11111111;
52285 let r = _mm256_mask_cmp_epu32_mask::<_MM_CMPINT_LT>(mask, a, b);
52286 assert_eq!(r, 0b11001111);
52287 }
52288
52289 #[simd_test(enable = "avx512f,avx512vl")]
52290 const fn test_mm_cmp_epu32_mask() {
52291 let a = _mm_set_epi32(0, 1, -1, i32::MAX);
52292 let b = _mm_set1_epi32(1);
52293 let m = _mm_cmp_epu32_mask::<_MM_CMPINT_LT>(a, b);
52294 assert_eq!(m, 0b00001000);
52295 }
52296
52297 #[simd_test(enable = "avx512f,avx512vl")]
52298 const fn test_mm_mask_cmp_epu32_mask() {
52299 let a = _mm_set_epi32(0, 1, -1, i32::MAX);
52300 let b = _mm_set1_epi32(1);
52301 let mask = 0b11111111;
52302 let r = _mm_mask_cmp_epu32_mask::<_MM_CMPINT_LT>(mask, a, b);
52303 assert_eq!(r, 0b00001000);
52304 }
52305
52306 #[simd_test(enable = "avx512f")]
52307 const fn test_mm512_cmplt_epi32_mask() {
52308 #[rustfmt::skip]
52309 let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
52310 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
52311 let b = _mm512_set1_epi32(-1);
52312 let m = _mm512_cmplt_epi32_mask(a, b);
52313 assert_eq!(m, 0b00000101_00000101);
52314 }
52315
52316 #[simd_test(enable = "avx512f")]
52317 const fn test_mm512_mask_cmplt_epi32_mask() {
52318 #[rustfmt::skip]
52319 let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
52320 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
52321 let b = _mm512_set1_epi32(-1);
52322 let mask = 0b01100110_01100110;
52323 let r = _mm512_mask_cmplt_epi32_mask(mask, a, b);
52324 assert_eq!(r, 0b00000100_00000100);
52325 }
52326
52327 #[simd_test(enable = "avx512f,avx512vl")]
52328 const fn test_mm256_cmplt_epi32_mask() {
52329 let a = _mm256_set_epi32(0, 1, -1, 101, i32::MAX, i32::MIN, 100, -100);
52330 let b = _mm256_set1_epi32(-1);
52331 let r = _mm256_cmplt_epi32_mask(a, b);
52332 assert_eq!(r, 0b00000101);
52333 }
52334
52335 #[simd_test(enable = "avx512f,avx512vl")]
52336 const fn test_mm256_mask_cmplt_epi32_mask() {
52337 let a = _mm256_set_epi32(0, 1, -1, 101, i32::MAX, i32::MIN, 100, -100);
52338 let b = _mm256_set1_epi32(-1);
52339 let mask = 0b11111111;
52340 let r = _mm256_mask_cmplt_epi32_mask(mask, a, b);
52341 assert_eq!(r, 0b00000101);
52342 }
52343
52344 #[simd_test(enable = "avx512f,avx512vl")]
52345 const fn test_mm_cmplt_epi32_mask() {
52346 let a = _mm_set_epi32(i32::MAX, i32::MIN, 100, -100);
52347 let b = _mm_set1_epi32(-1);
52348 let r = _mm_cmplt_epi32_mask(a, b);
52349 assert_eq!(r, 0b00000101);
52350 }
52351
52352 #[simd_test(enable = "avx512f,avx512vl")]
52353 const fn test_mm_mask_cmplt_epi32_mask() {
52354 let a = _mm_set_epi32(i32::MAX, i32::MIN, 100, -100);
52355 let b = _mm_set1_epi32(-1);
52356 let mask = 0b11111111;
52357 let r = _mm_mask_cmplt_epi32_mask(mask, a, b);
52358 assert_eq!(r, 0b00000101);
52359 }
52360
52361 #[simd_test(enable = "avx512f")]
52362 const fn test_mm512_cmpgt_epi32_mask() {
52363 #[rustfmt::skip]
52364 let a = _mm512_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100,
52365 0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
52366 let b = _mm512_set1_epi32(-1);
52367 let m = _mm512_cmpgt_epi32_mask(b, a);
52368 assert_eq!(m, 0b00000101_00000101);
52369 }
52370
52371 #[simd_test(enable = "avx512f")]
52372 const fn test_mm512_mask_cmpgt_epi32_mask() {
52373 #[rustfmt::skip]
52374 let a = _mm512_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100,
52375 0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
52376 let b = _mm512_set1_epi32(-1);
52377 let mask = 0b01100110_01100110;
52378 let r = _mm512_mask_cmpgt_epi32_mask(mask, b, a);
52379 assert_eq!(r, 0b00000100_00000100);
52380 }
52381
52382 #[simd_test(enable = "avx512f,avx512vl")]
52383 const fn test_mm256_cmpgt_epi32_mask() {
52384 let a = _mm256_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
52385 let b = _mm256_set1_epi32(-1);
52386 let r = _mm256_cmpgt_epi32_mask(a, b);
52387 assert_eq!(r, 0b11011010);
52388 }
52389
52390 #[simd_test(enable = "avx512f,avx512vl")]
52391 const fn test_mm256_mask_cmpgt_epi32_mask() {
52392 let a = _mm256_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
52393 let b = _mm256_set1_epi32(-1);
52394 let mask = 0b11111111;
52395 let r = _mm256_mask_cmpgt_epi32_mask(mask, a, b);
52396 assert_eq!(r, 0b11011010);
52397 }
52398
52399 #[simd_test(enable = "avx512f,avx512vl")]
52400 const fn test_mm_cmpgt_epi32_mask() {
52401 let a = _mm_set_epi32(0, 1, -1, 13);
52402 let b = _mm_set1_epi32(-1);
52403 let r = _mm_cmpgt_epi32_mask(a, b);
52404 assert_eq!(r, 0b00001101);
52405 }
52406
52407 #[simd_test(enable = "avx512f,avx512vl")]
52408 const fn test_mm_mask_cmpgt_epi32_mask() {
52409 let a = _mm_set_epi32(0, 1, -1, 13);
52410 let b = _mm_set1_epi32(-1);
52411 let mask = 0b11111111;
52412 let r = _mm_mask_cmpgt_epi32_mask(mask, a, b);
52413 assert_eq!(r, 0b00001101);
52414 }
52415
52416 #[simd_test(enable = "avx512f")]
52417 const fn test_mm512_cmple_epi32_mask() {
52418 #[rustfmt::skip]
52419 let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
52420 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
52421 let b = _mm512_set1_epi32(-1);
52422 assert_eq!(
52423 _mm512_cmple_epi32_mask(a, b),
52424 !_mm512_cmpgt_epi32_mask(a, b)
52425 )
52426 }
52427
52428 #[simd_test(enable = "avx512f")]
52429 const fn test_mm512_mask_cmple_epi32_mask() {
52430 #[rustfmt::skip]
52431 let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
52432 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
52433 let b = _mm512_set1_epi32(-1);
52434 let mask = 0b01111010_01111010;
52435 assert_eq!(_mm512_mask_cmple_epi32_mask(mask, a, b), 0b01100000_0110000);
52436 }
52437
52438 #[simd_test(enable = "avx512f,avx512vl")]
52439 const fn test_mm256_cmple_epi32_mask() {
52440 let a = _mm256_set_epi32(0, 1, -1, 200, i32::MAX, i32::MIN, 100, -100);
52441 let b = _mm256_set1_epi32(-1);
52442 let r = _mm256_cmple_epi32_mask(a, b);
52443 assert_eq!(r, 0b00100101)
52444 }
52445
52446 #[simd_test(enable = "avx512f,avx512vl")]
52447 const fn test_mm256_mask_cmple_epi32_mask() {
52448 let a = _mm256_set_epi32(0, 1, -1, 200, i32::MAX, i32::MIN, 100, -100);
52449 let b = _mm256_set1_epi32(-1);
52450 let mask = 0b11111111;
52451 let r = _mm256_mask_cmple_epi32_mask(mask, a, b);
52452 assert_eq!(r, 0b00100101)
52453 }
52454
52455 #[simd_test(enable = "avx512f,avx512vl")]
52456 const fn test_mm_cmple_epi32_mask() {
52457 let a = _mm_set_epi32(0, 1, -1, 200);
52458 let b = _mm_set1_epi32(-1);
52459 let r = _mm_cmple_epi32_mask(a, b);
52460 assert_eq!(r, 0b00000010)
52461 }
52462
52463 #[simd_test(enable = "avx512f,avx512vl")]
52464 const fn test_mm_mask_cmple_epi32_mask() {
52465 let a = _mm_set_epi32(0, 1, -1, 200);
52466 let b = _mm_set1_epi32(-1);
52467 let mask = 0b11111111;
52468 let r = _mm_mask_cmple_epi32_mask(mask, a, b);
52469 assert_eq!(r, 0b00000010)
52470 }
52471
52472 #[simd_test(enable = "avx512f")]
52473 const fn test_mm512_cmpge_epi32_mask() {
52474 #[rustfmt::skip]
52475 let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
52476 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
52477 let b = _mm512_set1_epi32(-1);
52478 assert_eq!(
52479 _mm512_cmpge_epi32_mask(a, b),
52480 !_mm512_cmplt_epi32_mask(a, b)
52481 )
52482 }
52483
52484 #[simd_test(enable = "avx512f")]
52485 const fn test_mm512_mask_cmpge_epi32_mask() {
52486 #[rustfmt::skip]
52487 let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
52488 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
52489 let b = _mm512_set1_epi32(-1);
52490 let mask = 0b01111010_01111010;
52491 assert_eq!(
52492 _mm512_mask_cmpge_epi32_mask(mask, a, b),
52493 0b01111010_01111010
52494 );
52495 }
52496
52497 #[simd_test(enable = "avx512f,avx512vl")]
52498 const fn test_mm256_cmpge_epi32_mask() {
52499 let a = _mm256_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
52500 let b = _mm256_set1_epi32(-1);
52501 let r = _mm256_cmpge_epi32_mask(a, b);
52502 assert_eq!(r, 0b11111010)
52503 }
52504
52505 #[simd_test(enable = "avx512f,avx512vl")]
52506 const fn test_mm256_mask_cmpge_epi32_mask() {
52507 let a = _mm256_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
52508 let b = _mm256_set1_epi32(-1);
52509 let mask = 0b11111111;
52510 let r = _mm256_mask_cmpge_epi32_mask(mask, a, b);
52511 assert_eq!(r, 0b11111010)
52512 }
52513
52514 #[simd_test(enable = "avx512f,avx512vl")]
52515 const fn test_mm_cmpge_epi32_mask() {
52516 let a = _mm_set_epi32(0, 1, -1, u32::MAX as i32);
52517 let b = _mm_set1_epi32(-1);
52518 let r = _mm_cmpge_epi32_mask(a, b);
52519 assert_eq!(r, 0b00001111)
52520 }
52521
52522 #[simd_test(enable = "avx512f,avx512vl")]
52523 const fn test_mm_mask_cmpge_epi32_mask() {
52524 let a = _mm_set_epi32(0, 1, -1, u32::MAX as i32);
52525 let b = _mm_set1_epi32(-1);
52526 let mask = 0b11111111;
52527 let r = _mm_mask_cmpge_epi32_mask(mask, a, b);
52528 assert_eq!(r, 0b00001111)
52529 }
52530
52531 #[simd_test(enable = "avx512f")]
52532 const fn test_mm512_cmpeq_epi32_mask() {
52533 #[rustfmt::skip]
52534 let a = _mm512_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100,
52535 0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
52536 #[rustfmt::skip]
52537 let b = _mm512_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100,
52538 0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
52539 let m = _mm512_cmpeq_epi32_mask(b, a);
52540 assert_eq!(m, 0b11001111_11001111);
52541 }
52542
52543 #[simd_test(enable = "avx512f")]
52544 const fn test_mm512_mask_cmpeq_epi32_mask() {
52545 #[rustfmt::skip]
52546 let a = _mm512_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100,
52547 0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
52548 #[rustfmt::skip]
52549 let b = _mm512_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100,
52550 0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
52551 let mask = 0b01111010_01111010;
52552 let r = _mm512_mask_cmpeq_epi32_mask(mask, b, a);
52553 assert_eq!(r, 0b01001010_01001010);
52554 }
52555
52556 #[simd_test(enable = "avx512f,avx512vl")]
52557 const fn test_mm256_cmpeq_epi32_mask() {
52558 let a = _mm256_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
52559 let b = _mm256_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
52560 let m = _mm256_cmpeq_epi32_mask(b, a);
52561 assert_eq!(m, 0b11001111);
52562 }
52563
52564 #[simd_test(enable = "avx512f,avx512vl")]
52565 const fn test_mm256_mask_cmpeq_epi32_mask() {
52566 let a = _mm256_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
52567 let b = _mm256_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
52568 let mask = 0b01111010;
52569 let r = _mm256_mask_cmpeq_epi32_mask(mask, b, a);
52570 assert_eq!(r, 0b01001010);
52571 }
52572
52573 #[simd_test(enable = "avx512f,avx512vl")]
52574 const fn test_mm_cmpeq_epi32_mask() {
52575 let a = _mm_set_epi32(0, 1, -1, 13);
52576 let b = _mm_set_epi32(0, 1, 13, 42);
52577 let m = _mm_cmpeq_epi32_mask(b, a);
52578 assert_eq!(m, 0b00001100);
52579 }
52580
52581 #[simd_test(enable = "avx512f,avx512vl")]
52582 const fn test_mm_mask_cmpeq_epi32_mask() {
52583 let a = _mm_set_epi32(0, 1, -1, 13);
52584 let b = _mm_set_epi32(0, 1, 13, 42);
52585 let mask = 0b11111111;
52586 let r = _mm_mask_cmpeq_epi32_mask(mask, b, a);
52587 assert_eq!(r, 0b00001100);
52588 }
52589
52590 #[simd_test(enable = "avx512f")]
52591 const fn test_mm512_cmpneq_epi32_mask() {
52592 #[rustfmt::skip]
52593 let a = _mm512_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100,
52594 0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
52595 #[rustfmt::skip]
52596 let b = _mm512_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100,
52597 0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
52598 let m = _mm512_cmpneq_epi32_mask(b, a);
52599 assert_eq!(m, !_mm512_cmpeq_epi32_mask(b, a));
52600 }
52601
52602 #[simd_test(enable = "avx512f")]
52603 const fn test_mm512_mask_cmpneq_epi32_mask() {
52604 #[rustfmt::skip]
52605 let a = _mm512_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, -100, 100,
52606 0, 1, -1, 13, i32::MAX, i32::MIN, -100, 100);
52607 #[rustfmt::skip]
52608 let b = _mm512_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100,
52609 0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
52610 let mask = 0b01111010_01111010;
52611 let r = _mm512_mask_cmpneq_epi32_mask(mask, b, a);
52612 assert_eq!(r, 0b00110010_00110010)
52613 }
52614
52615 #[simd_test(enable = "avx512f,avx512vl")]
52616 const fn test_mm256_cmpneq_epi32_mask() {
52617 let a = _mm256_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
52618 let b = _mm256_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
52619 let m = _mm256_cmpneq_epi32_mask(b, a);
52620 assert_eq!(m, !_mm256_cmpeq_epi32_mask(b, a));
52621 }
52622
52623 #[simd_test(enable = "avx512f,avx512vl")]
52624 const fn test_mm256_mask_cmpneq_epi32_mask() {
52625 let a = _mm256_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, -100, 100);
52626 let b = _mm256_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
52627 let mask = 0b11111111;
52628 let r = _mm256_mask_cmpneq_epi32_mask(mask, b, a);
52629 assert_eq!(r, 0b00110011)
52630 }
52631
52632 #[simd_test(enable = "avx512f,avx512vl")]
52633 const fn test_mm_cmpneq_epi32_mask() {
52634 let a = _mm_set_epi32(0, 1, -1, 13);
52635 let b = _mm_set_epi32(0, 1, 13, 42);
52636 let r = _mm_cmpneq_epi32_mask(b, a);
52637 assert_eq!(r, 0b00000011)
52638 }
52639
52640 #[simd_test(enable = "avx512f,avx512vl")]
52641 const fn test_mm_mask_cmpneq_epi32_mask() {
52642 let a = _mm_set_epi32(0, 1, -1, 13);
52643 let b = _mm_set_epi32(0, 1, 13, 42);
52644 let mask = 0b11111111;
52645 let r = _mm_mask_cmpneq_epi32_mask(mask, b, a);
52646 assert_eq!(r, 0b00000011)
52647 }
52648
52649 #[simd_test(enable = "avx512f")]
52650 const fn test_mm512_cmp_epi32_mask() {
52651 #[rustfmt::skip]
52652 let a = _mm512_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100,
52653 0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
52654 let b = _mm512_set1_epi32(-1);
52655 let m = _mm512_cmp_epi32_mask::<_MM_CMPINT_LT>(a, b);
52656 assert_eq!(m, 0b00000101_00000101);
52657 }
52658
52659 #[simd_test(enable = "avx512f")]
52660 const fn test_mm512_mask_cmp_epi32_mask() {
52661 #[rustfmt::skip]
52662 let a = _mm512_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100,
52663 0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
52664 let b = _mm512_set1_epi32(-1);
52665 let mask = 0b01100110_01100110;
52666 let r = _mm512_mask_cmp_epi32_mask::<_MM_CMPINT_LT>(mask, a, b);
52667 assert_eq!(r, 0b00000100_00000100);
52668 }
52669
52670 #[simd_test(enable = "avx512f,avx512vl")]
52671 const fn test_mm256_cmp_epi32_mask() {
52672 let a = _mm256_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
52673 let b = _mm256_set1_epi32(-1);
52674 let m = _mm256_cmp_epi32_mask::<_MM_CMPINT_LT>(a, b);
52675 assert_eq!(m, 0b00000101);
52676 }
52677
52678 #[simd_test(enable = "avx512f,avx512vl")]
52679 const fn test_mm256_mask_cmp_epi32_mask() {
52680 let a = _mm256_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
52681 let b = _mm256_set1_epi32(-1);
52682 let mask = 0b01100110;
52683 let r = _mm256_mask_cmp_epi32_mask::<_MM_CMPINT_LT>(mask, a, b);
52684 assert_eq!(r, 0b00000100);
52685 }
52686
52687 #[simd_test(enable = "avx512f,avx512vl")]
52688 const fn test_mm_cmp_epi32_mask() {
52689 let a = _mm_set_epi32(0, 1, -1, 13);
52690 let b = _mm_set1_epi32(1);
52691 let m = _mm_cmp_epi32_mask::<_MM_CMPINT_LT>(a, b);
52692 assert_eq!(m, 0b00001010);
52693 }
52694
52695 #[simd_test(enable = "avx512f,avx512vl")]
52696 const fn test_mm_mask_cmp_epi32_mask() {
52697 let a = _mm_set_epi32(0, 1, -1, 13);
52698 let b = _mm_set1_epi32(1);
52699 let mask = 0b11111111;
52700 let r = _mm_mask_cmp_epi32_mask::<_MM_CMPINT_LT>(mask, a, b);
52701 assert_eq!(r, 0b00001010);
52702 }
52703
52704 #[simd_test(enable = "avx512f")]
52705 const fn test_mm512_set_epi8() {
52706 let r = _mm512_set1_epi8(2);
52707 assert_eq_m512i(
52708 r,
52709 _mm512_set_epi8(
52710 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
52711 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
52712 2, 2, 2, 2, 2, 2, 2, 2,
52713 ),
52714 )
52715 }
52716
52717 #[simd_test(enable = "avx512f")]
52718 const fn test_mm512_set_epi16() {
52719 let r = _mm512_set1_epi16(2);
52720 assert_eq_m512i(
52721 r,
52722 _mm512_set_epi16(
52723 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
52724 2, 2, 2, 2,
52725 ),
52726 )
52727 }
52728
52729 #[simd_test(enable = "avx512f")]
52730 const fn test_mm512_set_epi32() {
52731 let r = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
52732 assert_eq_m512i(
52733 r,
52734 _mm512_set_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0),
52735 )
52736 }
52737
52738 #[simd_test(enable = "avx512f")]
52739 const fn test_mm512_setr_epi32() {
52740 let r = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
52741 assert_eq_m512i(
52742 r,
52743 _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0),
52744 )
52745 }
52746
52747 #[simd_test(enable = "avx512f")]
52748 const fn test_mm512_set1_epi8() {
52749 let r = _mm512_set_epi8(
52750 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
52751 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
52752 2, 2, 2, 2, 2, 2,
52753 );
52754 assert_eq_m512i(r, _mm512_set1_epi8(2));
52755 }
52756
52757 #[simd_test(enable = "avx512f")]
52758 const fn test_mm512_set1_epi16() {
52759 let r = _mm512_set_epi16(
52760 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
52761 2, 2, 2,
52762 );
52763 assert_eq_m512i(r, _mm512_set1_epi16(2));
52764 }
52765
52766 #[simd_test(enable = "avx512f")]
52767 const fn test_mm512_set1_epi32() {
52768 let r = _mm512_set_epi32(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
52769 assert_eq_m512i(r, _mm512_set1_epi32(2));
52770 }
52771
52772 #[simd_test(enable = "avx512f")]
52773 const fn test_mm512_setzero_si512() {
52774 assert_eq_m512i(_mm512_set1_epi32(0), _mm512_setzero_si512());
52775 }
52776
52777 #[simd_test(enable = "avx512f")]
52778 const fn test_mm512_setzero_epi32() {
52779 assert_eq_m512i(_mm512_set1_epi32(0), _mm512_setzero_epi32());
52780 }
52781
52782 #[simd_test(enable = "avx512f")]
52783 const fn test_mm512_set_ps() {
52784 let r = _mm512_setr_ps(
52785 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
52786 );
52787 assert_eq_m512(
52788 r,
52789 _mm512_set_ps(
52790 15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
52791 ),
52792 )
52793 }
52794
52795 #[simd_test(enable = "avx512f")]
52796 const fn test_mm512_setr_ps() {
52797 let r = _mm512_set_ps(
52798 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
52799 );
52800 assert_eq_m512(
52801 r,
52802 _mm512_setr_ps(
52803 15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
52804 ),
52805 )
52806 }
52807
52808 #[simd_test(enable = "avx512f")]
52809 const fn test_mm512_set1_ps() {
52810 #[rustfmt::skip]
52811 let expected = _mm512_set_ps(2., 2., 2., 2., 2., 2., 2., 2.,
52812 2., 2., 2., 2., 2., 2., 2., 2.);
52813 assert_eq_m512(expected, _mm512_set1_ps(2.));
52814 }
52815
52816 #[simd_test(enable = "avx512f")]
52817 const fn test_mm512_set4_epi32() {
52818 let r = _mm512_set_epi32(4, 3, 2, 1, 4, 3, 2, 1, 4, 3, 2, 1, 4, 3, 2, 1);
52819 assert_eq_m512i(r, _mm512_set4_epi32(4, 3, 2, 1));
52820 }
52821
52822 #[simd_test(enable = "avx512f")]
52823 const fn test_mm512_set4_ps() {
52824 let r = _mm512_set_ps(
52825 4., 3., 2., 1., 4., 3., 2., 1., 4., 3., 2., 1., 4., 3., 2., 1.,
52826 );
52827 assert_eq_m512(r, _mm512_set4_ps(4., 3., 2., 1.));
52828 }
52829
52830 #[simd_test(enable = "avx512f")]
52831 const fn test_mm512_setr4_epi32() {
52832 let r = _mm512_set_epi32(4, 3, 2, 1, 4, 3, 2, 1, 4, 3, 2, 1, 4, 3, 2, 1);
52833 assert_eq_m512i(r, _mm512_setr4_epi32(1, 2, 3, 4));
52834 }
52835
52836 #[simd_test(enable = "avx512f")]
52837 const fn test_mm512_setr4_ps() {
52838 let r = _mm512_set_ps(
52839 4., 3., 2., 1., 4., 3., 2., 1., 4., 3., 2., 1., 4., 3., 2., 1.,
52840 );
52841 assert_eq_m512(r, _mm512_setr4_ps(1., 2., 3., 4.));
52842 }
52843
52844 #[simd_test(enable = "avx512f")]
52845 const fn test_mm512_setzero_ps() {
52846 assert_eq_m512(_mm512_setzero_ps(), _mm512_set1_ps(0.));
52847 }
52848
52849 #[simd_test(enable = "avx512f")]
52850 const fn test_mm512_setzero() {
52851 assert_eq_m512(_mm512_setzero(), _mm512_set1_ps(0.));
52852 }
52853
52854 #[simd_test(enable = "avx512f")]
52855 const fn test_mm512_loadu_pd() {
52856 let a = &[4., 3., 2., 5., 8., 9., 64., 50.];
52857 let p = a.as_ptr();
52858 let r = unsafe { _mm512_loadu_pd(black_box(p)) };
52859 let e = _mm512_setr_pd(4., 3., 2., 5., 8., 9., 64., 50.);
52860 assert_eq_m512d(r, e);
52861 }
52862
52863 #[simd_test(enable = "avx512f")]
52864 const fn test_mm512_storeu_pd() {
52865 let a = _mm512_set1_pd(9.);
52866 let mut r = _mm512_undefined_pd();
52867 unsafe {
52868 _mm512_storeu_pd(&mut r as *mut _ as *mut f64, a);
52869 }
52870 assert_eq_m512d(r, a);
52871 }
52872
52873 #[simd_test(enable = "avx512f")]
52874 const fn test_mm512_loadu_ps() {
52875 let a = &[
52876 4., 3., 2., 5., 8., 9., 64., 50., -4., -3., -2., -5., -8., -9., -64., -50.,
52877 ];
52878 let p = a.as_ptr();
52879 let r = unsafe { _mm512_loadu_ps(black_box(p)) };
52880 let e = _mm512_setr_ps(
52881 4., 3., 2., 5., 8., 9., 64., 50., -4., -3., -2., -5., -8., -9., -64., -50.,
52882 );
52883 assert_eq_m512(r, e);
52884 }
52885
52886 #[simd_test(enable = "avx512f")]
52887 const fn test_mm512_storeu_ps() {
52888 let a = _mm512_set1_ps(9.);
52889 let mut r = _mm512_undefined_ps();
52890 unsafe {
52891 _mm512_storeu_ps(&mut r as *mut _ as *mut f32, a);
52892 }
52893 assert_eq_m512(r, a);
52894 }
52895
52896 #[simd_test(enable = "avx512f")]
52897 const fn test_mm512_mask_loadu_epi32() {
52898 let src = _mm512_set1_epi32(42);
52899 let a = &[1_i32, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
52900 let p = a.as_ptr();
52901 let m = 0b11101000_11001010;
52902 let r = unsafe { _mm512_mask_loadu_epi32(src, m, black_box(p)) };
52903 let e = _mm512_setr_epi32(42, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16);
52904 assert_eq_m512i(r, e);
52905 }
52906
52907 #[simd_test(enable = "avx512f")]
52908 const fn test_mm512_maskz_loadu_epi32() {
52909 let a = &[1_i32, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
52910 let p = a.as_ptr();
52911 let m = 0b11101000_11001010;
52912 let r = unsafe { _mm512_maskz_loadu_epi32(m, black_box(p)) };
52913 let e = _mm512_setr_epi32(0, 2, 0, 4, 0, 0, 7, 8, 0, 0, 0, 12, 0, 14, 15, 16);
52914 assert_eq_m512i(r, e);
52915 }
52916
52917 #[simd_test(enable = "avx512f")]
52918 const fn test_mm512_mask_load_epi32() {
52919 #[repr(align(64))]
52920 struct Align {
52921 data: [i32; 16], // 64 bytes
52922 }
52923 let src = _mm512_set1_epi32(42);
52924 let a = Align {
52925 data: [1_i32, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
52926 };
52927 let p = a.data.as_ptr();
52928 let m = 0b11101000_11001010;
52929 let r = unsafe { _mm512_mask_load_epi32(src, m, black_box(p)) };
52930 let e = _mm512_setr_epi32(42, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16);
52931 assert_eq_m512i(r, e);
52932 }
52933
52934 #[simd_test(enable = "avx512f")]
52935 const fn test_mm512_maskz_load_epi32() {
52936 #[repr(align(64))]
52937 struct Align {
52938 data: [i32; 16], // 64 bytes
52939 }
52940 let a = Align {
52941 data: [1_i32, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
52942 };
52943 let p = a.data.as_ptr();
52944 let m = 0b11101000_11001010;
52945 let r = unsafe { _mm512_maskz_load_epi32(m, black_box(p)) };
52946 let e = _mm512_setr_epi32(0, 2, 0, 4, 0, 0, 7, 8, 0, 0, 0, 12, 0, 14, 15, 16);
52947 assert_eq_m512i(r, e);
52948 }
52949
52950 #[simd_test(enable = "avx512f")]
52951 const fn test_mm512_mask_storeu_epi32() {
52952 let mut r = [42_i32; 16];
52953 let a = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
52954 let m = 0b11101000_11001010;
52955 unsafe {
52956 _mm512_mask_storeu_epi32(r.as_mut_ptr(), m, a);
52957 }
52958 let e = _mm512_setr_epi32(42, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16);
52959 assert_eq_m512i(unsafe { _mm512_loadu_epi32(r.as_ptr()) }, e);
52960 }
52961
52962 #[simd_test(enable = "avx512f")]
52963 const fn test_mm512_mask_store_epi32() {
52964 #[repr(align(64))]
52965 struct Align {
52966 data: [i32; 16],
52967 }
52968 let mut r = Align { data: [42; 16] };
52969 let a = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
52970 let m = 0b11101000_11001010;
52971 unsafe {
52972 _mm512_mask_store_epi32(r.data.as_mut_ptr(), m, a);
52973 }
52974 let e = _mm512_setr_epi32(42, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16);
52975 assert_eq_m512i(unsafe { _mm512_load_epi32(r.data.as_ptr()) }, e);
52976 }
52977
52978 #[simd_test(enable = "avx512f")]
52979 const fn test_mm512_mask_loadu_epi64() {
52980 let src = _mm512_set1_epi64(42);
52981 let a = &[1_i64, 2, 3, 4, 5, 6, 7, 8];
52982 let p = a.as_ptr();
52983 let m = 0b11001010;
52984 let r = unsafe { _mm512_mask_loadu_epi64(src, m, black_box(p)) };
52985 let e = _mm512_setr_epi64(42, 2, 42, 4, 42, 42, 7, 8);
52986 assert_eq_m512i(r, e);
52987 }
52988
52989 #[simd_test(enable = "avx512f")]
52990 const fn test_mm512_maskz_loadu_epi64() {
52991 let a = &[1_i64, 2, 3, 4, 5, 6, 7, 8];
52992 let p = a.as_ptr();
52993 let m = 0b11001010;
52994 let r = unsafe { _mm512_maskz_loadu_epi64(m, black_box(p)) };
52995 let e = _mm512_setr_epi64(0, 2, 0, 4, 0, 0, 7, 8);
52996 assert_eq_m512i(r, e);
52997 }
52998
52999 #[simd_test(enable = "avx512f")]
53000 const fn test_mm512_mask_load_epi64() {
53001 #[repr(align(64))]
53002 struct Align {
53003 data: [i64; 8], // 64 bytes
53004 }
53005 let src = _mm512_set1_epi64(42);
53006 let a = Align {
53007 data: [1_i64, 2, 3, 4, 5, 6, 7, 8],
53008 };
53009 let p = a.data.as_ptr();
53010 let m = 0b11001010;
53011 let r = unsafe { _mm512_mask_load_epi64(src, m, black_box(p)) };
53012 let e = _mm512_setr_epi64(42, 2, 42, 4, 42, 42, 7, 8);
53013 assert_eq_m512i(r, e);
53014 }
53015
53016 #[simd_test(enable = "avx512f")]
53017 const fn test_mm512_maskz_load_epi64() {
53018 #[repr(align(64))]
53019 struct Align {
53020 data: [i64; 8], // 64 bytes
53021 }
53022 let a = Align {
53023 data: [1_i64, 2, 3, 4, 5, 6, 7, 8],
53024 };
53025 let p = a.data.as_ptr();
53026 let m = 0b11001010;
53027 let r = unsafe { _mm512_maskz_load_epi64(m, black_box(p)) };
53028 let e = _mm512_setr_epi64(0, 2, 0, 4, 0, 0, 7, 8);
53029 assert_eq_m512i(r, e);
53030 }
53031
53032 #[simd_test(enable = "avx512f")]
53033 const fn test_mm512_mask_storeu_epi64() {
53034 let mut r = [42_i64; 8];
53035 let a = _mm512_setr_epi64(1, 2, 3, 4, 5, 6, 7, 8);
53036 let m = 0b11001010;
53037 unsafe {
53038 _mm512_mask_storeu_epi64(r.as_mut_ptr(), m, a);
53039 }
53040 let e = _mm512_setr_epi64(42, 2, 42, 4, 42, 42, 7, 8);
53041 assert_eq_m512i(unsafe { _mm512_loadu_epi64(r.as_ptr()) }, e);
53042 }
53043
53044 #[simd_test(enable = "avx512f")]
53045 const fn test_mm512_mask_store_epi64() {
53046 #[repr(align(64))]
53047 struct Align {
53048 data: [i64; 8],
53049 }
53050 let mut r = Align { data: [42; 8] };
53051 let a = _mm512_setr_epi64(1, 2, 3, 4, 5, 6, 7, 8);
53052 let m = 0b11001010;
53053 let p = r.data.as_mut_ptr();
53054 unsafe {
53055 _mm512_mask_store_epi64(p, m, a);
53056 }
53057 let e = _mm512_setr_epi64(42, 2, 42, 4, 42, 42, 7, 8);
53058 assert_eq_m512i(unsafe { _mm512_load_epi64(r.data.as_ptr()) }, e);
53059 }
53060
53061 #[simd_test(enable = "avx512f")]
53062 const fn test_mm512_mask_loadu_ps() {
53063 let src = _mm512_set1_ps(42.0);
53064 let a = &[
53065 1.0_f32, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0,
53066 16.0,
53067 ];
53068 let p = a.as_ptr();
53069 let m = 0b11101000_11001010;
53070 let r = unsafe { _mm512_mask_loadu_ps(src, m, black_box(p)) };
53071 let e = _mm512_setr_ps(
53072 42.0, 2.0, 42.0, 4.0, 42.0, 42.0, 7.0, 8.0, 42.0, 42.0, 42.0, 12.0, 42.0, 14.0, 15.0,
53073 16.0,
53074 );
53075 assert_eq_m512(r, e);
53076 }
53077
53078 #[simd_test(enable = "avx512f")]
53079 const fn test_mm512_maskz_loadu_ps() {
53080 let a = &[
53081 1.0_f32, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0,
53082 16.0,
53083 ];
53084 let p = a.as_ptr();
53085 let m = 0b11101000_11001010;
53086 let r = unsafe { _mm512_maskz_loadu_ps(m, black_box(p)) };
53087 let e = _mm512_setr_ps(
53088 0.0, 2.0, 0.0, 4.0, 0.0, 0.0, 7.0, 8.0, 0.0, 0.0, 0.0, 12.0, 0.0, 14.0, 15.0, 16.0,
53089 );
53090 assert_eq_m512(r, e);
53091 }
53092
53093 #[simd_test(enable = "avx512f")]
53094 const fn test_mm512_mask_load_ps() {
53095 #[repr(align(64))]
53096 struct Align {
53097 data: [f32; 16], // 64 bytes
53098 }
53099 let src = _mm512_set1_ps(42.0);
53100 let a = Align {
53101 data: [
53102 1.0_f32, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0,
53103 15.0, 16.0,
53104 ],
53105 };
53106 let p = a.data.as_ptr();
53107 let m = 0b11101000_11001010;
53108 let r = unsafe { _mm512_mask_load_ps(src, m, black_box(p)) };
53109 let e = _mm512_setr_ps(
53110 42.0, 2.0, 42.0, 4.0, 42.0, 42.0, 7.0, 8.0, 42.0, 42.0, 42.0, 12.0, 42.0, 14.0, 15.0,
53111 16.0,
53112 );
53113 assert_eq_m512(r, e);
53114 }
53115
53116 #[simd_test(enable = "avx512f")]
53117 const fn test_mm512_maskz_load_ps() {
53118 #[repr(align(64))]
53119 struct Align {
53120 data: [f32; 16], // 64 bytes
53121 }
53122 let a = Align {
53123 data: [
53124 1.0_f32, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0,
53125 15.0, 16.0,
53126 ],
53127 };
53128 let p = a.data.as_ptr();
53129 let m = 0b11101000_11001010;
53130 let r = unsafe { _mm512_maskz_load_ps(m, black_box(p)) };
53131 let e = _mm512_setr_ps(
53132 0.0, 2.0, 0.0, 4.0, 0.0, 0.0, 7.0, 8.0, 0.0, 0.0, 0.0, 12.0, 0.0, 14.0, 15.0, 16.0,
53133 );
53134 assert_eq_m512(r, e);
53135 }
53136
53137 #[simd_test(enable = "avx512f")]
53138 const fn test_mm512_mask_storeu_ps() {
53139 let mut r = [42_f32; 16];
53140 let a = _mm512_setr_ps(
53141 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
53142 );
53143 let m = 0b11101000_11001010;
53144 unsafe {
53145 _mm512_mask_storeu_ps(r.as_mut_ptr(), m, a);
53146 }
53147 let e = _mm512_setr_ps(
53148 42.0, 2.0, 42.0, 4.0, 42.0, 42.0, 7.0, 8.0, 42.0, 42.0, 42.0, 12.0, 42.0, 14.0, 15.0,
53149 16.0,
53150 );
53151 assert_eq_m512(unsafe { _mm512_loadu_ps(r.as_ptr()) }, e);
53152 }
53153
53154 #[simd_test(enable = "avx512f")]
53155 const fn test_mm512_mask_store_ps() {
53156 #[repr(align(64))]
53157 struct Align {
53158 data: [f32; 16],
53159 }
53160 let mut r = Align { data: [42.0; 16] };
53161 let a = _mm512_setr_ps(
53162 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
53163 );
53164 let m = 0b11101000_11001010;
53165 unsafe {
53166 _mm512_mask_store_ps(r.data.as_mut_ptr(), m, a);
53167 }
53168 let e = _mm512_setr_ps(
53169 42.0, 2.0, 42.0, 4.0, 42.0, 42.0, 7.0, 8.0, 42.0, 42.0, 42.0, 12.0, 42.0, 14.0, 15.0,
53170 16.0,
53171 );
53172 assert_eq_m512(unsafe { _mm512_load_ps(r.data.as_ptr()) }, e);
53173 }
53174
53175 #[simd_test(enable = "avx512f")]
53176 const fn test_mm512_mask_loadu_pd() {
53177 let src = _mm512_set1_pd(42.0);
53178 let a = &[1.0_f64, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0];
53179 let p = a.as_ptr();
53180 let m = 0b11001010;
53181 let r = unsafe { _mm512_mask_loadu_pd(src, m, black_box(p)) };
53182 let e = _mm512_setr_pd(42.0, 2.0, 42.0, 4.0, 42.0, 42.0, 7.0, 8.0);
53183 assert_eq_m512d(r, e);
53184 }
53185
53186 #[simd_test(enable = "avx512f")]
53187 const fn test_mm512_maskz_loadu_pd() {
53188 let a = &[1.0_f64, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0];
53189 let p = a.as_ptr();
53190 let m = 0b11001010;
53191 let r = unsafe { _mm512_maskz_loadu_pd(m, black_box(p)) };
53192 let e = _mm512_setr_pd(0.0, 2.0, 0.0, 4.0, 0.0, 0.0, 7.0, 8.0);
53193 assert_eq_m512d(r, e);
53194 }
53195
53196 #[simd_test(enable = "avx512f")]
53197 const fn test_mm512_mask_load_pd() {
53198 #[repr(align(64))]
53199 struct Align {
53200 data: [f64; 8], // 64 bytes
53201 }
53202 let src = _mm512_set1_pd(42.0);
53203 let a = Align {
53204 data: [1.0_f64, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0],
53205 };
53206 let p = a.data.as_ptr();
53207 let m = 0b11001010;
53208 let r = unsafe { _mm512_mask_load_pd(src, m, black_box(p)) };
53209 let e = _mm512_setr_pd(42.0, 2.0, 42.0, 4.0, 42.0, 42.0, 7.0, 8.0);
53210 assert_eq_m512d(r, e);
53211 }
53212
53213 #[simd_test(enable = "avx512f")]
53214 const fn test_mm512_maskz_load_pd() {
53215 #[repr(align(64))]
53216 struct Align {
53217 data: [f64; 8], // 64 bytes
53218 }
53219 let a = Align {
53220 data: [1.0_f64, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0],
53221 };
53222 let p = a.data.as_ptr();
53223 let m = 0b11001010;
53224 let r = unsafe { _mm512_maskz_load_pd(m, black_box(p)) };
53225 let e = _mm512_setr_pd(0.0, 2.0, 0.0, 4.0, 0.0, 0.0, 7.0, 8.0);
53226 assert_eq_m512d(r, e);
53227 }
53228
53229 #[simd_test(enable = "avx512f")]
53230 const fn test_mm512_mask_storeu_pd() {
53231 let mut r = [42_f64; 8];
53232 let a = _mm512_setr_pd(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
53233 let m = 0b11001010;
53234 unsafe {
53235 _mm512_mask_storeu_pd(r.as_mut_ptr(), m, a);
53236 }
53237 let e = _mm512_setr_pd(42.0, 2.0, 42.0, 4.0, 42.0, 42.0, 7.0, 8.0);
53238 assert_eq_m512d(unsafe { _mm512_loadu_pd(r.as_ptr()) }, e);
53239 }
53240
53241 #[simd_test(enable = "avx512f")]
53242 const fn test_mm512_mask_store_pd() {
53243 #[repr(align(64))]
53244 struct Align {
53245 data: [f64; 8],
53246 }
53247 let mut r = Align { data: [42.0; 8] };
53248 let a = _mm512_setr_pd(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
53249 let m = 0b11001010;
53250 unsafe {
53251 _mm512_mask_store_pd(r.data.as_mut_ptr(), m, a);
53252 }
53253 let e = _mm512_setr_pd(42.0, 2.0, 42.0, 4.0, 42.0, 42.0, 7.0, 8.0);
53254 assert_eq_m512d(unsafe { _mm512_load_pd(r.data.as_ptr()) }, e);
53255 }
53256
53257 #[simd_test(enable = "avx512f,avx512vl")]
53258 const fn test_mm256_mask_loadu_epi32() {
53259 let src = _mm256_set1_epi32(42);
53260 let a = &[1_i32, 2, 3, 4, 5, 6, 7, 8];
53261 let p = a.as_ptr();
53262 let m = 0b11001010;
53263 let r = unsafe { _mm256_mask_loadu_epi32(src, m, black_box(p)) };
53264 let e = _mm256_setr_epi32(42, 2, 42, 4, 42, 42, 7, 8);
53265 assert_eq_m256i(r, e);
53266 }
53267
53268 #[simd_test(enable = "avx512f,avx512vl")]
53269 const fn test_mm256_maskz_loadu_epi32() {
53270 let a = &[1_i32, 2, 3, 4, 5, 6, 7, 8];
53271 let p = a.as_ptr();
53272 let m = 0b11001010;
53273 let r = unsafe { _mm256_maskz_loadu_epi32(m, black_box(p)) };
53274 let e = _mm256_setr_epi32(0, 2, 0, 4, 0, 0, 7, 8);
53275 assert_eq_m256i(r, e);
53276 }
53277
53278 #[simd_test(enable = "avx512f,avx512vl")]
53279 const fn test_mm256_mask_load_epi32() {
53280 #[repr(align(32))]
53281 struct Align {
53282 data: [i32; 8], // 32 bytes
53283 }
53284 let src = _mm256_set1_epi32(42);
53285 let a = Align {
53286 data: [1_i32, 2, 3, 4, 5, 6, 7, 8],
53287 };
53288 let p = a.data.as_ptr();
53289 let m = 0b11001010;
53290 let r = unsafe { _mm256_mask_load_epi32(src, m, black_box(p)) };
53291 let e = _mm256_setr_epi32(42, 2, 42, 4, 42, 42, 7, 8);
53292 assert_eq_m256i(r, e);
53293 }
53294
53295 #[simd_test(enable = "avx512f,avx512vl")]
53296 const fn test_mm256_maskz_load_epi32() {
53297 #[repr(align(32))]
53298 struct Align {
53299 data: [i32; 8], // 32 bytes
53300 }
53301 let a = Align {
53302 data: [1_i32, 2, 3, 4, 5, 6, 7, 8],
53303 };
53304 let p = a.data.as_ptr();
53305 let m = 0b11001010;
53306 let r = unsafe { _mm256_maskz_load_epi32(m, black_box(p)) };
53307 let e = _mm256_setr_epi32(0, 2, 0, 4, 0, 0, 7, 8);
53308 assert_eq_m256i(r, e);
53309 }
53310
53311 #[simd_test(enable = "avx512f,avx512vl")]
53312 const fn test_mm256_mask_storeu_epi32() {
53313 let mut r = [42_i32; 8];
53314 let a = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8);
53315 let m = 0b11001010;
53316 unsafe {
53317 _mm256_mask_storeu_epi32(r.as_mut_ptr(), m, a);
53318 }
53319 let e = _mm256_setr_epi32(42, 2, 42, 4, 42, 42, 7, 8);
53320 assert_eq_m256i(unsafe { _mm256_loadu_epi32(r.as_ptr()) }, e);
53321 }
53322
53323 #[simd_test(enable = "avx512f,avx512vl")]
53324 const fn test_mm256_mask_store_epi32() {
53325 #[repr(align(64))]
53326 struct Align {
53327 data: [i32; 8],
53328 }
53329 let mut r = Align { data: [42; 8] };
53330 let a = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8);
53331 let m = 0b11001010;
53332 unsafe {
53333 _mm256_mask_store_epi32(r.data.as_mut_ptr(), m, a);
53334 }
53335 let e = _mm256_setr_epi32(42, 2, 42, 4, 42, 42, 7, 8);
53336 assert_eq_m256i(unsafe { _mm256_load_epi32(r.data.as_ptr()) }, e);
53337 }
53338
53339 #[simd_test(enable = "avx512f,avx512vl")]
53340 const fn test_mm256_mask_loadu_epi64() {
53341 let src = _mm256_set1_epi64x(42);
53342 let a = &[1_i64, 2, 3, 4];
53343 let p = a.as_ptr();
53344 let m = 0b1010;
53345 let r = unsafe { _mm256_mask_loadu_epi64(src, m, black_box(p)) };
53346 let e = _mm256_setr_epi64x(42, 2, 42, 4);
53347 assert_eq_m256i(r, e);
53348 }
53349
53350 #[simd_test(enable = "avx512f,avx512vl")]
53351 const fn test_mm256_maskz_loadu_epi64() {
53352 let a = &[1_i64, 2, 3, 4];
53353 let p = a.as_ptr();
53354 let m = 0b1010;
53355 let r = unsafe { _mm256_maskz_loadu_epi64(m, black_box(p)) };
53356 let e = _mm256_setr_epi64x(0, 2, 0, 4);
53357 assert_eq_m256i(r, e);
53358 }
53359
53360 #[simd_test(enable = "avx512f,avx512vl")]
53361 const fn test_mm256_mask_load_epi64() {
53362 #[repr(align(32))]
53363 struct Align {
53364 data: [i64; 4], // 32 bytes
53365 }
53366 let src = _mm256_set1_epi64x(42);
53367 let a = Align {
53368 data: [1_i64, 2, 3, 4],
53369 };
53370 let p = a.data.as_ptr();
53371 let m = 0b1010;
53372 let r = unsafe { _mm256_mask_load_epi64(src, m, black_box(p)) };
53373 let e = _mm256_setr_epi64x(42, 2, 42, 4);
53374 assert_eq_m256i(r, e);
53375 }
53376
53377 #[simd_test(enable = "avx512f,avx512vl")]
53378 const fn test_mm256_maskz_load_epi64() {
53379 #[repr(align(32))]
53380 struct Align {
53381 data: [i64; 4], // 32 bytes
53382 }
53383 let a = Align {
53384 data: [1_i64, 2, 3, 4],
53385 };
53386 let p = a.data.as_ptr();
53387 let m = 0b1010;
53388 let r = unsafe { _mm256_maskz_load_epi64(m, black_box(p)) };
53389 let e = _mm256_setr_epi64x(0, 2, 0, 4);
53390 assert_eq_m256i(r, e);
53391 }
53392
53393 #[simd_test(enable = "avx512f,avx512vl")]
53394 const fn test_mm256_mask_storeu_epi64() {
53395 let mut r = [42_i64; 4];
53396 let a = _mm256_setr_epi64x(1, 2, 3, 4);
53397 let m = 0b1010;
53398 unsafe {
53399 _mm256_mask_storeu_epi64(r.as_mut_ptr(), m, a);
53400 }
53401 let e = _mm256_setr_epi64x(42, 2, 42, 4);
53402 assert_eq_m256i(unsafe { _mm256_loadu_epi64(r.as_ptr()) }, e);
53403 }
53404
53405 #[simd_test(enable = "avx512f,avx512vl")]
53406 const fn test_mm256_mask_store_epi64() {
53407 #[repr(align(32))]
53408 struct Align {
53409 data: [i64; 4],
53410 }
53411 let mut r = Align { data: [42; 4] };
53412 let a = _mm256_setr_epi64x(1, 2, 3, 4);
53413 let m = 0b1010;
53414 unsafe {
53415 _mm256_mask_store_epi64(r.data.as_mut_ptr(), m, a);
53416 }
53417 let e = _mm256_setr_epi64x(42, 2, 42, 4);
53418 assert_eq_m256i(unsafe { _mm256_load_epi64(r.data.as_ptr()) }, e);
53419 }
53420
53421 #[simd_test(enable = "avx512f,avx512vl")]
53422 const fn test_mm256_mask_loadu_ps() {
53423 let src = _mm256_set1_ps(42.0);
53424 let a = &[1.0_f32, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0];
53425 let p = a.as_ptr();
53426 let m = 0b11001010;
53427 let r = unsafe { _mm256_mask_loadu_ps(src, m, black_box(p)) };
53428 let e = _mm256_setr_ps(42.0, 2.0, 42.0, 4.0, 42.0, 42.0, 7.0, 8.0);
53429 assert_eq_m256(r, e);
53430 }
53431
53432 #[simd_test(enable = "avx512f,avx512vl")]
53433 const fn test_mm256_maskz_loadu_ps() {
53434 let a = &[1.0_f32, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0];
53435 let p = a.as_ptr();
53436 let m = 0b11001010;
53437 let r = unsafe { _mm256_maskz_loadu_ps(m, black_box(p)) };
53438 let e = _mm256_setr_ps(0.0, 2.0, 0.0, 4.0, 0.0, 0.0, 7.0, 8.0);
53439 assert_eq_m256(r, e);
53440 }
53441
53442 #[simd_test(enable = "avx512f,avx512vl")]
53443 const fn test_mm256_mask_load_ps() {
53444 #[repr(align(32))]
53445 struct Align {
53446 data: [f32; 8], // 32 bytes
53447 }
53448 let src = _mm256_set1_ps(42.0);
53449 let a = Align {
53450 data: [1.0_f32, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0],
53451 };
53452 let p = a.data.as_ptr();
53453 let m = 0b11001010;
53454 let r = unsafe { _mm256_mask_load_ps(src, m, black_box(p)) };
53455 let e = _mm256_setr_ps(42.0, 2.0, 42.0, 4.0, 42.0, 42.0, 7.0, 8.0);
53456 assert_eq_m256(r, e);
53457 }
53458
53459 #[simd_test(enable = "avx512f,avx512vl")]
53460 const fn test_mm256_maskz_load_ps() {
53461 #[repr(align(32))]
53462 struct Align {
53463 data: [f32; 8], // 32 bytes
53464 }
53465 let a = Align {
53466 data: [1.0_f32, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0],
53467 };
53468 let p = a.data.as_ptr();
53469 let m = 0b11001010;
53470 let r = unsafe { _mm256_maskz_load_ps(m, black_box(p)) };
53471 let e = _mm256_setr_ps(0.0, 2.0, 0.0, 4.0, 0.0, 0.0, 7.0, 8.0);
53472 assert_eq_m256(r, e);
53473 }
53474
53475 #[simd_test(enable = "avx512f,avx512vl")]
53476 const fn test_mm256_mask_storeu_ps() {
53477 let mut r = [42_f32; 8];
53478 let a = _mm256_setr_ps(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
53479 let m = 0b11001010;
53480 unsafe {
53481 _mm256_mask_storeu_ps(r.as_mut_ptr(), m, a);
53482 }
53483 let e = _mm256_setr_ps(42.0, 2.0, 42.0, 4.0, 42.0, 42.0, 7.0, 8.0);
53484 assert_eq_m256(unsafe { _mm256_loadu_ps(r.as_ptr()) }, e);
53485 }
53486
53487 #[simd_test(enable = "avx512f,avx512vl")]
53488 const fn test_mm256_mask_store_ps() {
53489 #[repr(align(32))]
53490 struct Align {
53491 data: [f32; 8],
53492 }
53493 let mut r = Align { data: [42.0; 8] };
53494 let a = _mm256_setr_ps(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
53495 let m = 0b11001010;
53496 unsafe {
53497 _mm256_mask_store_ps(r.data.as_mut_ptr(), m, a);
53498 }
53499 let e = _mm256_setr_ps(42.0, 2.0, 42.0, 4.0, 42.0, 42.0, 7.0, 8.0);
53500 assert_eq_m256(unsafe { _mm256_load_ps(r.data.as_ptr()) }, e);
53501 }
53502
53503 #[simd_test(enable = "avx512f,avx512vl")]
53504 const fn test_mm256_mask_loadu_pd() {
53505 let src = _mm256_set1_pd(42.0);
53506 let a = &[1.0_f64, 2.0, 3.0, 4.0];
53507 let p = a.as_ptr();
53508 let m = 0b1010;
53509 let r = unsafe { _mm256_mask_loadu_pd(src, m, black_box(p)) };
53510 let e = _mm256_setr_pd(42.0, 2.0, 42.0, 4.0);
53511 assert_eq_m256d(r, e);
53512 }
53513
53514 #[simd_test(enable = "avx512f,avx512vl")]
53515 const fn test_mm256_maskz_loadu_pd() {
53516 let a = &[1.0_f64, 2.0, 3.0, 4.0];
53517 let p = a.as_ptr();
53518 let m = 0b1010;
53519 let r = unsafe { _mm256_maskz_loadu_pd(m, black_box(p)) };
53520 let e = _mm256_setr_pd(0.0, 2.0, 0.0, 4.0);
53521 assert_eq_m256d(r, e);
53522 }
53523
53524 #[simd_test(enable = "avx512f,avx512vl")]
53525 const fn test_mm256_mask_load_pd() {
53526 #[repr(align(32))]
53527 struct Align {
53528 data: [f64; 4], // 32 bytes
53529 }
53530 let src = _mm256_set1_pd(42.0);
53531 let a = Align {
53532 data: [1.0_f64, 2.0, 3.0, 4.0],
53533 };
53534 let p = a.data.as_ptr();
53535 let m = 0b1010;
53536 let r = unsafe { _mm256_mask_load_pd(src, m, black_box(p)) };
53537 let e = _mm256_setr_pd(42.0, 2.0, 42.0, 4.0);
53538 assert_eq_m256d(r, e);
53539 }
53540
53541 #[simd_test(enable = "avx512f,avx512vl")]
53542 const fn test_mm256_maskz_load_pd() {
53543 #[repr(align(32))]
53544 struct Align {
53545 data: [f64; 4], // 32 bytes
53546 }
53547 let a = Align {
53548 data: [1.0_f64, 2.0, 3.0, 4.0],
53549 };
53550 let p = a.data.as_ptr();
53551 let m = 0b1010;
53552 let r = unsafe { _mm256_maskz_load_pd(m, black_box(p)) };
53553 let e = _mm256_setr_pd(0.0, 2.0, 0.0, 4.0);
53554 assert_eq_m256d(r, e);
53555 }
53556
53557 #[simd_test(enable = "avx512f,avx512vl")]
53558 const fn test_mm256_mask_storeu_pd() {
53559 let mut r = [42_f64; 4];
53560 let a = _mm256_setr_pd(1.0, 2.0, 3.0, 4.0);
53561 let m = 0b1010;
53562 unsafe {
53563 _mm256_mask_storeu_pd(r.as_mut_ptr(), m, a);
53564 }
53565 let e = _mm256_setr_pd(42.0, 2.0, 42.0, 4.0);
53566 assert_eq_m256d(unsafe { _mm256_loadu_pd(r.as_ptr()) }, e);
53567 }
53568
53569 #[simd_test(enable = "avx512f,avx512vl")]
53570 const fn test_mm256_mask_store_pd() {
53571 #[repr(align(32))]
53572 struct Align {
53573 data: [f64; 4],
53574 }
53575 let mut r = Align { data: [42.0; 4] };
53576 let a = _mm256_setr_pd(1.0, 2.0, 3.0, 4.0);
53577 let m = 0b1010;
53578 unsafe {
53579 _mm256_mask_store_pd(r.data.as_mut_ptr(), m, a);
53580 }
53581 let e = _mm256_setr_pd(42.0, 2.0, 42.0, 4.0);
53582 assert_eq_m256d(unsafe { _mm256_load_pd(r.data.as_ptr()) }, e);
53583 }
53584
53585 #[simd_test(enable = "avx512f,avx512vl")]
53586 const fn test_mm_mask_loadu_epi32() {
53587 let src = _mm_set1_epi32(42);
53588 let a = &[1_i32, 2, 3, 4];
53589 let p = a.as_ptr();
53590 let m = 0b1010;
53591 let r = unsafe { _mm_mask_loadu_epi32(src, m, black_box(p)) };
53592 let e = _mm_setr_epi32(42, 2, 42, 4);
53593 assert_eq_m128i(r, e);
53594 }
53595
53596 #[simd_test(enable = "avx512f,avx512vl")]
53597 const fn test_mm_maskz_loadu_epi32() {
53598 let a = &[1_i32, 2, 3, 4];
53599 let p = a.as_ptr();
53600 let m = 0b1010;
53601 let r = unsafe { _mm_maskz_loadu_epi32(m, black_box(p)) };
53602 let e = _mm_setr_epi32(0, 2, 0, 4);
53603 assert_eq_m128i(r, e);
53604 }
53605
53606 #[simd_test(enable = "avx512f,avx512vl")]
53607 const fn test_mm_mask_load_epi32() {
53608 #[repr(align(16))]
53609 struct Align {
53610 data: [i32; 4], // 32 bytes
53611 }
53612 let src = _mm_set1_epi32(42);
53613 let a = Align {
53614 data: [1_i32, 2, 3, 4],
53615 };
53616 let p = a.data.as_ptr();
53617 let m = 0b1010;
53618 let r = unsafe { _mm_mask_load_epi32(src, m, black_box(p)) };
53619 let e = _mm_setr_epi32(42, 2, 42, 4);
53620 assert_eq_m128i(r, e);
53621 }
53622
53623 #[simd_test(enable = "avx512f,avx512vl")]
53624 const fn test_mm_maskz_load_epi32() {
53625 #[repr(align(16))]
53626 struct Align {
53627 data: [i32; 4], // 16 bytes
53628 }
53629 let a = Align {
53630 data: [1_i32, 2, 3, 4],
53631 };
53632 let p = a.data.as_ptr();
53633 let m = 0b1010;
53634 let r = unsafe { _mm_maskz_load_epi32(m, black_box(p)) };
53635 let e = _mm_setr_epi32(0, 2, 0, 4);
53636 assert_eq_m128i(r, e);
53637 }
53638
53639 #[simd_test(enable = "avx512f,avx512vl")]
53640 const fn test_mm_mask_storeu_epi32() {
53641 let mut r = [42_i32; 4];
53642 let a = _mm_setr_epi32(1, 2, 3, 4);
53643 let m = 0b1010;
53644 unsafe {
53645 _mm_mask_storeu_epi32(r.as_mut_ptr(), m, a);
53646 }
53647 let e = _mm_setr_epi32(42, 2, 42, 4);
53648 assert_eq_m128i(unsafe { _mm_loadu_epi32(r.as_ptr()) }, e);
53649 }
53650
53651 #[simd_test(enable = "avx512f,avx512vl")]
53652 const fn test_mm_mask_store_epi32() {
53653 #[repr(align(16))]
53654 struct Align {
53655 data: [i32; 4], // 16 bytes
53656 }
53657 let mut r = Align { data: [42; 4] };
53658 let a = _mm_setr_epi32(1, 2, 3, 4);
53659 let m = 0b1010;
53660 unsafe {
53661 _mm_mask_store_epi32(r.data.as_mut_ptr(), m, a);
53662 }
53663 let e = _mm_setr_epi32(42, 2, 42, 4);
53664 assert_eq_m128i(unsafe { _mm_load_epi32(r.data.as_ptr()) }, e);
53665 }
53666
53667 #[simd_test(enable = "avx512f,avx512vl")]
53668 const fn test_mm_mask_loadu_epi64() {
53669 let src = _mm_set1_epi64x(42);
53670 let a = &[1_i64, 2];
53671 let p = a.as_ptr();
53672 let m = 0b10;
53673 let r = unsafe { _mm_mask_loadu_epi64(src, m, black_box(p)) };
53674 let e = _mm_setr_epi64x(42, 2);
53675 assert_eq_m128i(r, e);
53676 }
53677
53678 #[simd_test(enable = "avx512f,avx512vl")]
53679 const fn test_mm_maskz_loadu_epi64() {
53680 let a = &[1_i64, 2];
53681 let p = a.as_ptr();
53682 let m = 0b10;
53683 let r = unsafe { _mm_maskz_loadu_epi64(m, black_box(p)) };
53684 let e = _mm_setr_epi64x(0, 2);
53685 assert_eq_m128i(r, e);
53686 }
53687
53688 #[simd_test(enable = "avx512f,avx512vl")]
53689 const fn test_mm_mask_load_epi64() {
53690 #[repr(align(16))]
53691 struct Align {
53692 data: [i64; 2], // 16 bytes
53693 }
53694 let src = _mm_set1_epi64x(42);
53695 let a = Align { data: [1_i64, 2] };
53696 let p = a.data.as_ptr();
53697 let m = 0b10;
53698 let r = unsafe { _mm_mask_load_epi64(src, m, black_box(p)) };
53699 let e = _mm_setr_epi64x(42, 2);
53700 assert_eq_m128i(r, e);
53701 }
53702
53703 #[simd_test(enable = "avx512f,avx512vl")]
53704 const fn test_mm_maskz_load_epi64() {
53705 #[repr(align(16))]
53706 struct Align {
53707 data: [i64; 2], // 16 bytes
53708 }
53709 let a = Align { data: [1_i64, 2] };
53710 let p = a.data.as_ptr();
53711 let m = 0b10;
53712 let r = unsafe { _mm_maskz_load_epi64(m, black_box(p)) };
53713 let e = _mm_setr_epi64x(0, 2);
53714 assert_eq_m128i(r, e);
53715 }
53716
53717 #[simd_test(enable = "avx512f,avx512vl")]
53718 const fn test_mm_mask_storeu_epi64() {
53719 let mut r = [42_i64; 2];
53720 let a = _mm_setr_epi64x(1, 2);
53721 let m = 0b10;
53722 unsafe {
53723 _mm_mask_storeu_epi64(r.as_mut_ptr(), m, a);
53724 }
53725 let e = _mm_setr_epi64x(42, 2);
53726 assert_eq_m128i(unsafe { _mm_loadu_epi64(r.as_ptr()) }, e);
53727 }
53728
53729 #[simd_test(enable = "avx512f,avx512vl")]
53730 const fn test_mm_mask_store_epi64() {
53731 #[repr(align(16))]
53732 struct Align {
53733 data: [i64; 2], // 16 bytes
53734 }
53735 let mut r = Align { data: [42; 2] };
53736 let a = _mm_setr_epi64x(1, 2);
53737 let m = 0b10;
53738 unsafe {
53739 _mm_mask_store_epi64(r.data.as_mut_ptr(), m, a);
53740 }
53741 let e = _mm_setr_epi64x(42, 2);
53742 assert_eq_m128i(unsafe { _mm_load_epi64(r.data.as_ptr()) }, e);
53743 }
53744
53745 #[simd_test(enable = "avx512f,avx512vl")]
53746 const fn test_mm_mask_loadu_ps() {
53747 let src = _mm_set1_ps(42.0);
53748 let a = &[1.0_f32, 2.0, 3.0, 4.0];
53749 let p = a.as_ptr();
53750 let m = 0b1010;
53751 let r = unsafe { _mm_mask_loadu_ps(src, m, black_box(p)) };
53752 let e = _mm_setr_ps(42.0, 2.0, 42.0, 4.0);
53753 assert_eq_m128(r, e);
53754 }
53755
53756 #[simd_test(enable = "avx512f,avx512vl")]
53757 const fn test_mm_maskz_loadu_ps() {
53758 let a = &[1.0_f32, 2.0, 3.0, 4.0];
53759 let p = a.as_ptr();
53760 let m = 0b1010;
53761 let r = unsafe { _mm_maskz_loadu_ps(m, black_box(p)) };
53762 let e = _mm_setr_ps(0.0, 2.0, 0.0, 4.0);
53763 assert_eq_m128(r, e);
53764 }
53765
53766 #[simd_test(enable = "avx512f,avx512vl")]
53767 const fn test_mm_mask_load_ps() {
53768 #[repr(align(16))]
53769 struct Align {
53770 data: [f32; 4], // 16 bytes
53771 }
53772 let src = _mm_set1_ps(42.0);
53773 let a = Align {
53774 data: [1.0_f32, 2.0, 3.0, 4.0],
53775 };
53776 let p = a.data.as_ptr();
53777 let m = 0b1010;
53778 let r = unsafe { _mm_mask_load_ps(src, m, black_box(p)) };
53779 let e = _mm_setr_ps(42.0, 2.0, 42.0, 4.0);
53780 assert_eq_m128(r, e);
53781 }
53782
53783 #[simd_test(enable = "avx512f,avx512vl")]
53784 const fn test_mm_maskz_load_ps() {
53785 #[repr(align(16))]
53786 struct Align {
53787 data: [f32; 4], // 16 bytes
53788 }
53789 let a = Align {
53790 data: [1.0_f32, 2.0, 3.0, 4.0],
53791 };
53792 let p = a.data.as_ptr();
53793 let m = 0b1010;
53794 let r = unsafe { _mm_maskz_load_ps(m, black_box(p)) };
53795 let e = _mm_setr_ps(0.0, 2.0, 0.0, 4.0);
53796 assert_eq_m128(r, e);
53797 }
53798
53799 #[simd_test(enable = "avx512f,avx512vl")]
53800 const fn test_mm_mask_storeu_ps() {
53801 let mut r = [42_f32; 4];
53802 let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
53803 let m = 0b1010;
53804 unsafe {
53805 _mm_mask_storeu_ps(r.as_mut_ptr(), m, a);
53806 }
53807 let e = _mm_setr_ps(42.0, 2.0, 42.0, 4.0);
53808 assert_eq_m128(unsafe { _mm_loadu_ps(r.as_ptr()) }, e);
53809 }
53810
53811 #[simd_test(enable = "avx512f,avx512vl")]
53812 const fn test_mm_mask_store_ps() {
53813 #[repr(align(16))]
53814 struct Align {
53815 data: [f32; 4], // 16 bytes
53816 }
53817 let mut r = Align { data: [42.0; 4] };
53818 let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
53819 let m = 0b1010;
53820 unsafe {
53821 _mm_mask_store_ps(r.data.as_mut_ptr(), m, a);
53822 }
53823 let e = _mm_setr_ps(42.0, 2.0, 42.0, 4.0);
53824 assert_eq_m128(unsafe { _mm_load_ps(r.data.as_ptr()) }, e);
53825 }
53826
53827 #[simd_test(enable = "avx512f,avx512vl")]
53828 const fn test_mm_mask_loadu_pd() {
53829 let src = _mm_set1_pd(42.0);
53830 let a = &[1.0_f64, 2.0];
53831 let p = a.as_ptr();
53832 let m = 0b10;
53833 let r = unsafe { _mm_mask_loadu_pd(src, m, black_box(p)) };
53834 let e = _mm_setr_pd(42.0, 2.0);
53835 assert_eq_m128d(r, e);
53836 }
53837
53838 #[simd_test(enable = "avx512f,avx512vl")]
53839 const fn test_mm_maskz_loadu_pd() {
53840 let a = &[1.0_f64, 2.0];
53841 let p = a.as_ptr();
53842 let m = 0b10;
53843 let r = unsafe { _mm_maskz_loadu_pd(m, black_box(p)) };
53844 let e = _mm_setr_pd(0.0, 2.0);
53845 assert_eq_m128d(r, e);
53846 }
53847
53848 #[simd_test(enable = "avx512f,avx512vl")]
53849 const fn test_mm_mask_load_pd() {
53850 #[repr(align(16))]
53851 struct Align {
53852 data: [f64; 2], // 16 bytes
53853 }
53854 let src = _mm_set1_pd(42.0);
53855 let a = Align {
53856 data: [1.0_f64, 2.0],
53857 };
53858 let p = a.data.as_ptr();
53859 let m = 0b10;
53860 let r = unsafe { _mm_mask_load_pd(src, m, black_box(p)) };
53861 let e = _mm_setr_pd(42.0, 2.0);
53862 assert_eq_m128d(r, e);
53863 }
53864
53865 #[simd_test(enable = "avx512f,avx512vl")]
53866 const fn test_mm_maskz_load_pd() {
53867 #[repr(align(16))]
53868 struct Align {
53869 data: [f64; 2], // 16 bytes
53870 }
53871 let a = Align {
53872 data: [1.0_f64, 2.0],
53873 };
53874 let p = a.data.as_ptr();
53875 let m = 0b10;
53876 let r = unsafe { _mm_maskz_load_pd(m, black_box(p)) };
53877 let e = _mm_setr_pd(0.0, 2.0);
53878 assert_eq_m128d(r, e);
53879 }
53880
53881 #[simd_test(enable = "avx512f")]
53882 fn test_mm_mask_load_ss() {
53883 #[repr(align(16))]
53884 struct Align {
53885 data: f32,
53886 }
53887 let src = _mm_set_ss(2.0);
53888 let mem = Align { data: 1.0 };
53889 let r = unsafe { _mm_mask_load_ss(src, 0b1, &mem.data) };
53890 assert_eq_m128(r, _mm_set_ss(1.0));
53891 let r = unsafe { _mm_mask_load_ss(src, 0b0, &mem.data) };
53892 assert_eq_m128(r, _mm_set_ss(2.0));
53893 }
53894
53895 #[simd_test(enable = "avx512f")]
53896 fn test_mm_maskz_load_ss() {
53897 #[repr(align(16))]
53898 struct Align {
53899 data: f32,
53900 }
53901 let mem = Align { data: 1.0 };
53902 let r = unsafe { _mm_maskz_load_ss(0b1, &mem.data) };
53903 assert_eq_m128(r, _mm_set_ss(1.0));
53904 let r = unsafe { _mm_maskz_load_ss(0b0, &mem.data) };
53905 assert_eq_m128(r, _mm_set_ss(0.0));
53906 }
53907
53908 #[simd_test(enable = "avx512f")]
53909 fn test_mm_mask_load_sd() {
53910 #[repr(align(16))]
53911 struct Align {
53912 data: f64,
53913 }
53914 let src = _mm_set_sd(2.0);
53915 let mem = Align { data: 1.0 };
53916 let r = unsafe { _mm_mask_load_sd(src, 0b1, &mem.data) };
53917 assert_eq_m128d(r, _mm_set_sd(1.0));
53918 let r = unsafe { _mm_mask_load_sd(src, 0b0, &mem.data) };
53919 assert_eq_m128d(r, _mm_set_sd(2.0));
53920 }
53921
53922 #[simd_test(enable = "avx512f")]
53923 fn test_mm_maskz_load_sd() {
53924 #[repr(align(16))]
53925 struct Align {
53926 data: f64,
53927 }
53928 let mem = Align { data: 1.0 };
53929 let r = unsafe { _mm_maskz_load_sd(0b1, &mem.data) };
53930 assert_eq_m128d(r, _mm_set_sd(1.0));
53931 let r = unsafe { _mm_maskz_load_sd(0b0, &mem.data) };
53932 assert_eq_m128d(r, _mm_set_sd(0.0));
53933 }
53934
53935 #[simd_test(enable = "avx512f,avx512vl")]
53936 const fn test_mm_mask_storeu_pd() {
53937 let mut r = [42_f64; 2];
53938 let a = _mm_setr_pd(1.0, 2.0);
53939 let m = 0b10;
53940 unsafe {
53941 _mm_mask_storeu_pd(r.as_mut_ptr(), m, a);
53942 }
53943 let e = _mm_setr_pd(42.0, 2.0);
53944 assert_eq_m128d(unsafe { _mm_loadu_pd(r.as_ptr()) }, e);
53945 }
53946
53947 #[simd_test(enable = "avx512f,avx512vl")]
53948 const fn test_mm_mask_store_pd() {
53949 #[repr(align(16))]
53950 struct Align {
53951 data: [f64; 2], // 16 bytes
53952 }
53953 let mut r = Align { data: [42.0; 2] };
53954 let a = _mm_setr_pd(1.0, 2.0);
53955 let m = 0b10;
53956 unsafe {
53957 _mm_mask_store_pd(r.data.as_mut_ptr(), m, a);
53958 }
53959 let e = _mm_setr_pd(42.0, 2.0);
53960 assert_eq_m128d(unsafe { _mm_load_pd(r.data.as_ptr()) }, e);
53961 }
53962
53963 #[simd_test(enable = "avx512f")]
53964 fn test_mm_mask_store_ss() {
53965 #[repr(align(16))]
53966 struct Align {
53967 data: f32,
53968 }
53969 let a = _mm_set_ss(2.0);
53970 let mut mem = Align { data: 1.0 };
53971 unsafe {
53972 _mm_mask_store_ss(&mut mem.data, 0b1, a);
53973 }
53974 assert_eq!(mem.data, 2.0);
53975 unsafe {
53976 _mm_mask_store_ss(&mut mem.data, 0b0, a);
53977 }
53978 assert_eq!(mem.data, 2.0);
53979 }
53980
53981 #[simd_test(enable = "avx512f")]
53982 fn test_mm_mask_store_sd() {
53983 #[repr(align(16))]
53984 struct Align {
53985 data: f64,
53986 }
53987 let a = _mm_set_sd(2.0);
53988 let mut mem = Align { data: 1.0 };
53989 unsafe {
53990 _mm_mask_store_sd(&mut mem.data, 0b1, a);
53991 }
53992 assert_eq!(mem.data, 2.0);
53993 unsafe {
53994 _mm_mask_store_sd(&mut mem.data, 0b0, a);
53995 }
53996 assert_eq!(mem.data, 2.0);
53997 }
53998
53999 #[simd_test(enable = "avx512f")]
54000 const fn test_mm512_setr_pd() {
54001 let r = _mm512_set_pd(0., 1., 2., 3., 4., 5., 6., 7.);
54002 assert_eq_m512d(r, _mm512_setr_pd(7., 6., 5., 4., 3., 2., 1., 0.));
54003 }
54004
54005 #[simd_test(enable = "avx512f")]
54006 const fn test_mm512_set_pd() {
54007 let r = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.);
54008 assert_eq_m512d(r, _mm512_set_pd(7., 6., 5., 4., 3., 2., 1., 0.));
54009 }
54010
54011 #[simd_test(enable = "avx512f")]
54012 const fn test_mm512_rol_epi32() {
54013 let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
54014 let r = _mm512_rol_epi32::<1>(a);
54015 let e = _mm512_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
54016 assert_eq_m512i(r, e);
54017 }
54018
54019 #[simd_test(enable = "avx512f")]
54020 const fn test_mm512_mask_rol_epi32() {
54021 let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
54022 let r = _mm512_mask_rol_epi32::<1>(a, 0, a);
54023 assert_eq_m512i(r, a);
54024 let r = _mm512_mask_rol_epi32::<1>(a, 0b11111111_11111111, a);
54025 let e = _mm512_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
54026 assert_eq_m512i(r, e);
54027 }
54028
54029 #[simd_test(enable = "avx512f")]
54030 const fn test_mm512_maskz_rol_epi32() {
54031 let a = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 << 31);
54032 let r = _mm512_maskz_rol_epi32::<1>(0, a);
54033 assert_eq_m512i(r, _mm512_setzero_si512());
54034 let r = _mm512_maskz_rol_epi32::<1>(0b00000000_11111111, a);
54035 let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 1 << 0);
54036 assert_eq_m512i(r, e);
54037 }
54038
54039 #[simd_test(enable = "avx512f,avx512vl")]
54040 const fn test_mm256_rol_epi32() {
54041 let a = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
54042 let r = _mm256_rol_epi32::<1>(a);
54043 let e = _mm256_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2);
54044 assert_eq_m256i(r, e);
54045 }
54046
54047 #[simd_test(enable = "avx512f,avx512vl")]
54048 const fn test_mm256_mask_rol_epi32() {
54049 let a = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
54050 let r = _mm256_mask_rol_epi32::<1>(a, 0, a);
54051 assert_eq_m256i(r, a);
54052 let r = _mm256_mask_rol_epi32::<1>(a, 0b11111111, a);
54053 let e = _mm256_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2);
54054 assert_eq_m256i(r, e);
54055 }
54056
54057 #[simd_test(enable = "avx512f,avx512vl")]
54058 const fn test_mm256_maskz_rol_epi32() {
54059 let a = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
54060 let r = _mm256_maskz_rol_epi32::<1>(0, a);
54061 assert_eq_m256i(r, _mm256_setzero_si256());
54062 let r = _mm256_maskz_rol_epi32::<1>(0b11111111, a);
54063 let e = _mm256_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2);
54064 assert_eq_m256i(r, e);
54065 }
54066
54067 #[simd_test(enable = "avx512f,avx512vl")]
54068 const fn test_mm_rol_epi32() {
54069 let a = _mm_set_epi32(1 << 31, 1, 1, 1);
54070 let r = _mm_rol_epi32::<1>(a);
54071 let e = _mm_set_epi32(1 << 0, 2, 2, 2);
54072 assert_eq_m128i(r, e);
54073 }
54074
54075 #[simd_test(enable = "avx512f,avx512vl")]
54076 const fn test_mm_mask_rol_epi32() {
54077 let a = _mm_set_epi32(1 << 31, 1, 1, 1);
54078 let r = _mm_mask_rol_epi32::<1>(a, 0, a);
54079 assert_eq_m128i(r, a);
54080 let r = _mm_mask_rol_epi32::<1>(a, 0b00001111, a);
54081 let e = _mm_set_epi32(1 << 0, 2, 2, 2);
54082 assert_eq_m128i(r, e);
54083 }
54084
54085 #[simd_test(enable = "avx512f,avx512vl")]
54086 const fn test_mm_maskz_rol_epi32() {
54087 let a = _mm_set_epi32(1 << 31, 1, 1, 1);
54088 let r = _mm_maskz_rol_epi32::<1>(0, a);
54089 assert_eq_m128i(r, _mm_setzero_si128());
54090 let r = _mm_maskz_rol_epi32::<1>(0b00001111, a);
54091 let e = _mm_set_epi32(1 << 0, 2, 2, 2);
54092 assert_eq_m128i(r, e);
54093 }
54094
54095 #[simd_test(enable = "avx512f")]
54096 const fn test_mm512_ror_epi32() {
54097 let a = _mm512_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
54098 let r = _mm512_ror_epi32::<1>(a);
54099 let e = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
54100 assert_eq_m512i(r, e);
54101 }
54102
54103 #[simd_test(enable = "avx512f")]
54104 const fn test_mm512_mask_ror_epi32() {
54105 let a = _mm512_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
54106 let r = _mm512_mask_ror_epi32::<1>(a, 0, a);
54107 assert_eq_m512i(r, a);
54108 let r = _mm512_mask_ror_epi32::<1>(a, 0b11111111_11111111, a);
54109 let e = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
54110 assert_eq_m512i(r, e);
54111 }
54112
54113 #[simd_test(enable = "avx512f")]
54114 const fn test_mm512_maskz_ror_epi32() {
54115 let a = _mm512_set_epi32(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1 << 0);
54116 let r = _mm512_maskz_ror_epi32::<1>(0, a);
54117 assert_eq_m512i(r, _mm512_setzero_si512());
54118 let r = _mm512_maskz_ror_epi32::<1>(0b00000000_11111111, a);
54119 let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1 << 31);
54120 assert_eq_m512i(r, e);
54121 }
54122
54123 #[simd_test(enable = "avx512f,avx512vl")]
54124 const fn test_mm256_ror_epi32() {
54125 let a = _mm256_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2);
54126 let r = _mm256_ror_epi32::<1>(a);
54127 let e = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
54128 assert_eq_m256i(r, e);
54129 }
54130
54131 #[simd_test(enable = "avx512f,avx512vl")]
54132 const fn test_mm256_mask_ror_epi32() {
54133 let a = _mm256_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2);
54134 let r = _mm256_mask_ror_epi32::<1>(a, 0, a);
54135 assert_eq_m256i(r, a);
54136 let r = _mm256_mask_ror_epi32::<1>(a, 0b11111111, a);
54137 let e = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
54138 assert_eq_m256i(r, e);
54139 }
54140
54141 #[simd_test(enable = "avx512f,avx512vl")]
54142 const fn test_mm256_maskz_ror_epi32() {
54143 let a = _mm256_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2);
54144 let r = _mm256_maskz_ror_epi32::<1>(0, a);
54145 assert_eq_m256i(r, _mm256_setzero_si256());
54146 let r = _mm256_maskz_ror_epi32::<1>(0b11111111, a);
54147 let e = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
54148 assert_eq_m256i(r, e);
54149 }
54150
54151 #[simd_test(enable = "avx512f,avx512vl")]
54152 const fn test_mm_ror_epi32() {
54153 let a = _mm_set_epi32(1 << 0, 2, 2, 2);
54154 let r = _mm_ror_epi32::<1>(a);
54155 let e = _mm_set_epi32(1 << 31, 1, 1, 1);
54156 assert_eq_m128i(r, e);
54157 }
54158
54159 #[simd_test(enable = "avx512f,avx512vl")]
54160 const fn test_mm_mask_ror_epi32() {
54161 let a = _mm_set_epi32(1 << 0, 2, 2, 2);
54162 let r = _mm_mask_ror_epi32::<1>(a, 0, a);
54163 assert_eq_m128i(r, a);
54164 let r = _mm_mask_ror_epi32::<1>(a, 0b00001111, a);
54165 let e = _mm_set_epi32(1 << 31, 1, 1, 1);
54166 assert_eq_m128i(r, e);
54167 }
54168
54169 #[simd_test(enable = "avx512f,avx512vl")]
54170 const fn test_mm_maskz_ror_epi32() {
54171 let a = _mm_set_epi32(1 << 0, 2, 2, 2);
54172 let r = _mm_maskz_ror_epi32::<1>(0, a);
54173 assert_eq_m128i(r, _mm_setzero_si128());
54174 let r = _mm_maskz_ror_epi32::<1>(0b00001111, a);
54175 let e = _mm_set_epi32(1 << 31, 1, 1, 1);
54176 assert_eq_m128i(r, e);
54177 }
54178
54179 #[simd_test(enable = "avx512f")]
54180 const fn test_mm512_slli_epi32() {
54181 let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
54182 let r = _mm512_slli_epi32::<1>(a);
54183 let e = _mm512_set_epi32(0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
54184 assert_eq_m512i(r, e);
54185 }
54186
54187 #[simd_test(enable = "avx512f")]
54188 const fn test_mm512_mask_slli_epi32() {
54189 let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
54190 let r = _mm512_mask_slli_epi32::<1>(a, 0, a);
54191 assert_eq_m512i(r, a);
54192 let r = _mm512_mask_slli_epi32::<1>(a, 0b11111111_11111111, a);
54193 let e = _mm512_set_epi32(0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
54194 assert_eq_m512i(r, e);
54195 }
54196
54197 #[simd_test(enable = "avx512f")]
54198 const fn test_mm512_maskz_slli_epi32() {
54199 let a = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 << 31);
54200 let r = _mm512_maskz_slli_epi32::<1>(0, a);
54201 assert_eq_m512i(r, _mm512_setzero_si512());
54202 let r = _mm512_maskz_slli_epi32::<1>(0b00000000_11111111, a);
54203 let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 0);
54204 assert_eq_m512i(r, e);
54205 }
54206
54207 #[simd_test(enable = "avx512f,avx512vl")]
54208 const fn test_mm256_mask_slli_epi32() {
54209 let a = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
54210 let r = _mm256_mask_slli_epi32::<1>(a, 0, a);
54211 assert_eq_m256i(r, a);
54212 let r = _mm256_mask_slli_epi32::<1>(a, 0b11111111, a);
54213 let e = _mm256_set_epi32(0, 2, 2, 2, 2, 2, 2, 2);
54214 assert_eq_m256i(r, e);
54215 }
54216
54217 #[simd_test(enable = "avx512f,avx512vl")]
54218 const fn test_mm256_maskz_slli_epi32() {
54219 let a = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
54220 let r = _mm256_maskz_slli_epi32::<1>(0, a);
54221 assert_eq_m256i(r, _mm256_setzero_si256());
54222 let r = _mm256_maskz_slli_epi32::<1>(0b11111111, a);
54223 let e = _mm256_set_epi32(0, 2, 2, 2, 2, 2, 2, 2);
54224 assert_eq_m256i(r, e);
54225 }
54226
54227 #[simd_test(enable = "avx512f,avx512vl")]
54228 const fn test_mm_mask_slli_epi32() {
54229 let a = _mm_set_epi32(1 << 31, 1, 1, 1);
54230 let r = _mm_mask_slli_epi32::<1>(a, 0, a);
54231 assert_eq_m128i(r, a);
54232 let r = _mm_mask_slli_epi32::<1>(a, 0b00001111, a);
54233 let e = _mm_set_epi32(0, 2, 2, 2);
54234 assert_eq_m128i(r, e);
54235 }
54236
54237 #[simd_test(enable = "avx512f,avx512vl")]
54238 const fn test_mm_maskz_slli_epi32() {
54239 let a = _mm_set_epi32(1 << 31, 1, 1, 1);
54240 let r = _mm_maskz_slli_epi32::<1>(0, a);
54241 assert_eq_m128i(r, _mm_setzero_si128());
54242 let r = _mm_maskz_slli_epi32::<1>(0b00001111, a);
54243 let e = _mm_set_epi32(0, 2, 2, 2);
54244 assert_eq_m128i(r, e);
54245 }
54246
54247 #[simd_test(enable = "avx512f")]
54248 const fn test_mm512_srli_epi32() {
54249 let a = _mm512_set_epi32(0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
54250 let r = _mm512_srli_epi32::<1>(a);
54251 let e = _mm512_set_epi32(0 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
54252 assert_eq_m512i(r, e);
54253 }
54254
54255 #[simd_test(enable = "avx512f")]
54256 const fn test_mm512_mask_srli_epi32() {
54257 let a = _mm512_set_epi32(0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
54258 let r = _mm512_mask_srli_epi32::<1>(a, 0, a);
54259 assert_eq_m512i(r, a);
54260 let r = _mm512_mask_srli_epi32::<1>(a, 0b11111111_11111111, a);
54261 let e = _mm512_set_epi32(0 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
54262 assert_eq_m512i(r, e);
54263 }
54264
54265 #[simd_test(enable = "avx512f")]
54266 const fn test_mm512_maskz_srli_epi32() {
54267 let a = _mm512_set_epi32(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0);
54268 let r = _mm512_maskz_srli_epi32::<1>(0, a);
54269 assert_eq_m512i(r, _mm512_setzero_si512());
54270 let r = _mm512_maskz_srli_epi32::<1>(0b00000000_11111111, a);
54271 let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0 << 31);
54272 assert_eq_m512i(r, e);
54273 }
54274
54275 #[simd_test(enable = "avx512f,avx512vl")]
54276 const fn test_mm256_mask_srli_epi32() {
54277 let a = _mm256_set_epi32(1 << 5, 0, 0, 0, 0, 0, 0, 0);
54278 let r = _mm256_mask_srli_epi32::<1>(a, 0, a);
54279 assert_eq_m256i(r, a);
54280 let r = _mm256_mask_srli_epi32::<1>(a, 0b11111111, a);
54281 let e = _mm256_set_epi32(1 << 4, 0, 0, 0, 0, 0, 0, 0);
54282 assert_eq_m256i(r, e);
54283 }
54284
54285 #[simd_test(enable = "avx512f,avx512vl")]
54286 const fn test_mm256_maskz_srli_epi32() {
54287 let a = _mm256_set_epi32(1 << 5, 0, 0, 0, 0, 0, 0, 0);
54288 let r = _mm256_maskz_srli_epi32::<1>(0, a);
54289 assert_eq_m256i(r, _mm256_setzero_si256());
54290 let r = _mm256_maskz_srli_epi32::<1>(0b11111111, a);
54291 let e = _mm256_set_epi32(1 << 4, 0, 0, 0, 0, 0, 0, 0);
54292 assert_eq_m256i(r, e);
54293 }
54294
54295 #[simd_test(enable = "avx512f,avx512vl")]
54296 const fn test_mm_mask_srli_epi32() {
54297 let a = _mm_set_epi32(1 << 5, 0, 0, 0);
54298 let r = _mm_mask_srli_epi32::<1>(a, 0, a);
54299 assert_eq_m128i(r, a);
54300 let r = _mm_mask_srli_epi32::<1>(a, 0b00001111, a);
54301 let e = _mm_set_epi32(1 << 4, 0, 0, 0);
54302 assert_eq_m128i(r, e);
54303 }
54304
54305 #[simd_test(enable = "avx512f,avx512vl")]
54306 const fn test_mm_maskz_srli_epi32() {
54307 let a = _mm_set_epi32(1 << 5, 0, 0, 0);
54308 let r = _mm_maskz_srli_epi32::<1>(0, a);
54309 assert_eq_m128i(r, _mm_setzero_si128());
54310 let r = _mm_maskz_srli_epi32::<1>(0b00001111, a);
54311 let e = _mm_set_epi32(1 << 4, 0, 0, 0);
54312 assert_eq_m128i(r, e);
54313 }
54314
54315 #[simd_test(enable = "avx512f")]
54316 const fn test_mm512_rolv_epi32() {
54317 let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
54318 let b = _mm512_set1_epi32(1);
54319 let r = _mm512_rolv_epi32(a, b);
54320 let e = _mm512_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
54321 assert_eq_m512i(r, e);
54322 }
54323
54324 #[simd_test(enable = "avx512f")]
54325 const fn test_mm512_mask_rolv_epi32() {
54326 let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
54327 let b = _mm512_set1_epi32(1);
54328 let r = _mm512_mask_rolv_epi32(a, 0, a, b);
54329 assert_eq_m512i(r, a);
54330 let r = _mm512_mask_rolv_epi32(a, 0b11111111_11111111, a, b);
54331 let e = _mm512_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
54332 assert_eq_m512i(r, e);
54333 }
54334
54335 #[simd_test(enable = "avx512f")]
54336 const fn test_mm512_maskz_rolv_epi32() {
54337 let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 << 31);
54338 let b = _mm512_set1_epi32(1);
54339 let r = _mm512_maskz_rolv_epi32(0, a, b);
54340 assert_eq_m512i(r, _mm512_setzero_si512());
54341 let r = _mm512_maskz_rolv_epi32(0b00000000_11111111, a, b);
54342 let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 1 << 0);
54343 assert_eq_m512i(r, e);
54344 }
54345
54346 #[simd_test(enable = "avx512f,avx512vl")]
54347 const fn test_mm256_rolv_epi32() {
54348 let a = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
54349 let b = _mm256_set1_epi32(1);
54350 let r = _mm256_rolv_epi32(a, b);
54351 let e = _mm256_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2);
54352 assert_eq_m256i(r, e);
54353 }
54354
54355 #[simd_test(enable = "avx512f,avx512vl")]
54356 const fn test_mm256_mask_rolv_epi32() {
54357 let a = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
54358 let b = _mm256_set1_epi32(1);
54359 let r = _mm256_mask_rolv_epi32(a, 0, a, b);
54360 assert_eq_m256i(r, a);
54361 let r = _mm256_mask_rolv_epi32(a, 0b11111111, a, b);
54362 let e = _mm256_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2);
54363 assert_eq_m256i(r, e);
54364 }
54365
54366 #[simd_test(enable = "avx512f,avx512vl")]
54367 const fn test_mm256_maskz_rolv_epi32() {
54368 let a = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
54369 let b = _mm256_set1_epi32(1);
54370 let r = _mm256_maskz_rolv_epi32(0, a, b);
54371 assert_eq_m256i(r, _mm256_setzero_si256());
54372 let r = _mm256_maskz_rolv_epi32(0b11111111, a, b);
54373 let e = _mm256_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2);
54374 assert_eq_m256i(r, e);
54375 }
54376
54377 #[simd_test(enable = "avx512f,avx512vl")]
54378 const fn test_mm_rolv_epi32() {
54379 let a = _mm_set_epi32(1 << 31, 1, 1, 1);
54380 let b = _mm_set1_epi32(1);
54381 let r = _mm_rolv_epi32(a, b);
54382 let e = _mm_set_epi32(1 << 0, 2, 2, 2);
54383 assert_eq_m128i(r, e);
54384 }
54385
54386 #[simd_test(enable = "avx512f,avx512vl")]
54387 const fn test_mm_mask_rolv_epi32() {
54388 let a = _mm_set_epi32(1 << 31, 1, 1, 1);
54389 let b = _mm_set1_epi32(1);
54390 let r = _mm_mask_rolv_epi32(a, 0, a, b);
54391 assert_eq_m128i(r, a);
54392 let r = _mm_mask_rolv_epi32(a, 0b00001111, a, b);
54393 let e = _mm_set_epi32(1 << 0, 2, 2, 2);
54394 assert_eq_m128i(r, e);
54395 }
54396
54397 #[simd_test(enable = "avx512f,avx512vl")]
54398 const fn test_mm_maskz_rolv_epi32() {
54399 let a = _mm_set_epi32(1 << 31, 1, 1, 1);
54400 let b = _mm_set1_epi32(1);
54401 let r = _mm_maskz_rolv_epi32(0, a, b);
54402 assert_eq_m128i(r, _mm_setzero_si128());
54403 let r = _mm_maskz_rolv_epi32(0b00001111, a, b);
54404 let e = _mm_set_epi32(1 << 0, 2, 2, 2);
54405 assert_eq_m128i(r, e);
54406 }
54407
54408 #[simd_test(enable = "avx512f")]
54409 const fn test_mm512_rorv_epi32() {
54410 let a = _mm512_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
54411 let b = _mm512_set1_epi32(1);
54412 let r = _mm512_rorv_epi32(a, b);
54413 let e = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
54414 assert_eq_m512i(r, e);
54415 }
54416
54417 #[simd_test(enable = "avx512f")]
54418 const fn test_mm512_mask_rorv_epi32() {
54419 let a = _mm512_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
54420 let b = _mm512_set1_epi32(1);
54421 let r = _mm512_mask_rorv_epi32(a, 0, a, b);
54422 assert_eq_m512i(r, a);
54423 let r = _mm512_mask_rorv_epi32(a, 0b11111111_11111111, a, b);
54424 let e = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
54425 assert_eq_m512i(r, e);
54426 }
54427
54428 #[simd_test(enable = "avx512f")]
54429 const fn test_mm512_maskz_rorv_epi32() {
54430 let a = _mm512_set_epi32(3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1 << 0);
54431 let b = _mm512_set1_epi32(1);
54432 let r = _mm512_maskz_rorv_epi32(0, a, b);
54433 assert_eq_m512i(r, _mm512_setzero_si512());
54434 let r = _mm512_maskz_rorv_epi32(0b00000000_11111111, a, b);
54435 let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1 << 31);
54436 assert_eq_m512i(r, e);
54437 }
54438
54439 #[simd_test(enable = "avx512f,avx512vl")]
54440 const fn test_mm256_rorv_epi32() {
54441 let a = _mm256_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2);
54442 let b = _mm256_set1_epi32(1);
54443 let r = _mm256_rorv_epi32(a, b);
54444 let e = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
54445 assert_eq_m256i(r, e);
54446 }
54447
54448 #[simd_test(enable = "avx512f,avx512vl")]
54449 const fn test_mm256_mask_rorv_epi32() {
54450 let a = _mm256_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2);
54451 let b = _mm256_set1_epi32(1);
54452 let r = _mm256_mask_rorv_epi32(a, 0, a, b);
54453 assert_eq_m256i(r, a);
54454 let r = _mm256_mask_rorv_epi32(a, 0b11111111, a, b);
54455 let e = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
54456 assert_eq_m256i(r, e);
54457 }
54458
54459 #[simd_test(enable = "avx512f,avx512vl")]
54460 const fn test_mm256_maskz_rorv_epi32() {
54461 let a = _mm256_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2);
54462 let b = _mm256_set1_epi32(1);
54463 let r = _mm256_maskz_rorv_epi32(0, a, b);
54464 assert_eq_m256i(r, _mm256_setzero_si256());
54465 let r = _mm256_maskz_rorv_epi32(0b11111111, a, b);
54466 let e = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
54467 assert_eq_m256i(r, e);
54468 }
54469
54470 #[simd_test(enable = "avx512f,avx512vl")]
54471 const fn test_mm_rorv_epi32() {
54472 let a = _mm_set_epi32(1 << 0, 2, 2, 2);
54473 let b = _mm_set1_epi32(1);
54474 let r = _mm_rorv_epi32(a, b);
54475 let e = _mm_set_epi32(1 << 31, 1, 1, 1);
54476 assert_eq_m128i(r, e);
54477 }
54478
54479 #[simd_test(enable = "avx512f,avx512vl")]
54480 const fn test_mm_mask_rorv_epi32() {
54481 let a = _mm_set_epi32(1 << 0, 2, 2, 2);
54482 let b = _mm_set1_epi32(1);
54483 let r = _mm_mask_rorv_epi32(a, 0, a, b);
54484 assert_eq_m128i(r, a);
54485 let r = _mm_mask_rorv_epi32(a, 0b00001111, a, b);
54486 let e = _mm_set_epi32(1 << 31, 1, 1, 1);
54487 assert_eq_m128i(r, e);
54488 }
54489
54490 #[simd_test(enable = "avx512f,avx512vl")]
54491 const fn test_mm_maskz_rorv_epi32() {
54492 let a = _mm_set_epi32(1 << 0, 2, 2, 2);
54493 let b = _mm_set1_epi32(1);
54494 let r = _mm_maskz_rorv_epi32(0, a, b);
54495 assert_eq_m128i(r, _mm_setzero_si128());
54496 let r = _mm_maskz_rorv_epi32(0b00001111, a, b);
54497 let e = _mm_set_epi32(1 << 31, 1, 1, 1);
54498 assert_eq_m128i(r, e);
54499 }
54500
54501 #[simd_test(enable = "avx512f")]
54502 const fn test_mm512_sllv_epi32() {
54503 let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
54504 let count = _mm512_set1_epi32(1);
54505 let r = _mm512_sllv_epi32(a, count);
54506 let e = _mm512_set_epi32(0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
54507 assert_eq_m512i(r, e);
54508 }
54509
54510 #[simd_test(enable = "avx512f")]
54511 const fn test_mm512_mask_sllv_epi32() {
54512 let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
54513 let count = _mm512_set1_epi32(1);
54514 let r = _mm512_mask_sllv_epi32(a, 0, a, count);
54515 assert_eq_m512i(r, a);
54516 let r = _mm512_mask_sllv_epi32(a, 0b11111111_11111111, a, count);
54517 let e = _mm512_set_epi32(0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
54518 assert_eq_m512i(r, e);
54519 }
54520
54521 #[simd_test(enable = "avx512f")]
54522 const fn test_mm512_maskz_sllv_epi32() {
54523 let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 << 31);
54524 let count = _mm512_set_epi32(0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
54525 let r = _mm512_maskz_sllv_epi32(0, a, count);
54526 assert_eq_m512i(r, _mm512_setzero_si512());
54527 let r = _mm512_maskz_sllv_epi32(0b00000000_11111111, a, count);
54528 let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 0);
54529 assert_eq_m512i(r, e);
54530 }
54531
54532 #[simd_test(enable = "avx512f,avx512vl")]
54533 const fn test_mm256_mask_sllv_epi32() {
54534 let a = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
54535 let count = _mm256_set1_epi32(1);
54536 let r = _mm256_mask_sllv_epi32(a, 0, a, count);
54537 assert_eq_m256i(r, a);
54538 let r = _mm256_mask_sllv_epi32(a, 0b11111111, a, count);
54539 let e = _mm256_set_epi32(0, 2, 2, 2, 2, 2, 2, 2);
54540 assert_eq_m256i(r, e);
54541 }
54542
54543 #[simd_test(enable = "avx512f,avx512vl")]
54544 const fn test_mm256_maskz_sllv_epi32() {
54545 let a = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
54546 let count = _mm256_set1_epi32(1);
54547 let r = _mm256_maskz_sllv_epi32(0, a, count);
54548 assert_eq_m256i(r, _mm256_setzero_si256());
54549 let r = _mm256_maskz_sllv_epi32(0b11111111, a, count);
54550 let e = _mm256_set_epi32(0, 2, 2, 2, 2, 2, 2, 2);
54551 assert_eq_m256i(r, e);
54552 }
54553
54554 #[simd_test(enable = "avx512f,avx512vl")]
54555 const fn test_mm_mask_sllv_epi32() {
54556 let a = _mm_set_epi32(1 << 31, 1, 1, 1);
54557 let count = _mm_set1_epi32(1);
54558 let r = _mm_mask_sllv_epi32(a, 0, a, count);
54559 assert_eq_m128i(r, a);
54560 let r = _mm_mask_sllv_epi32(a, 0b00001111, a, count);
54561 let e = _mm_set_epi32(0, 2, 2, 2);
54562 assert_eq_m128i(r, e);
54563 }
54564
54565 #[simd_test(enable = "avx512f,avx512vl")]
54566 const fn test_mm_maskz_sllv_epi32() {
54567 let a = _mm_set_epi32(1 << 31, 1, 1, 1);
54568 let count = _mm_set1_epi32(1);
54569 let r = _mm_maskz_sllv_epi32(0, a, count);
54570 assert_eq_m128i(r, _mm_setzero_si128());
54571 let r = _mm_maskz_sllv_epi32(0b00001111, a, count);
54572 let e = _mm_set_epi32(0, 2, 2, 2);
54573 assert_eq_m128i(r, e);
54574 }
54575
54576 #[simd_test(enable = "avx512f")]
54577 const fn test_mm512_srlv_epi32() {
54578 let a = _mm512_set_epi32(0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
54579 let count = _mm512_set1_epi32(1);
54580 let r = _mm512_srlv_epi32(a, count);
54581 let e = _mm512_set_epi32(0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
54582 assert_eq_m512i(r, e);
54583 }
54584
54585 #[simd_test(enable = "avx512f")]
54586 const fn test_mm512_mask_srlv_epi32() {
54587 let a = _mm512_set_epi32(0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
54588 let count = _mm512_set1_epi32(1);
54589 let r = _mm512_mask_srlv_epi32(a, 0, a, count);
54590 assert_eq_m512i(r, a);
54591 let r = _mm512_mask_srlv_epi32(a, 0b11111111_11111111, a, count);
54592 let e = _mm512_set_epi32(0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
54593 assert_eq_m512i(r, e);
54594 }
54595
54596 #[simd_test(enable = "avx512f")]
54597 const fn test_mm512_maskz_srlv_epi32() {
54598 let a = _mm512_set_epi32(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0);
54599 let count = _mm512_set_epi32(0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
54600 let r = _mm512_maskz_srlv_epi32(0, a, count);
54601 assert_eq_m512i(r, _mm512_setzero_si512());
54602 let r = _mm512_maskz_srlv_epi32(0b00000000_11111111, a, count);
54603 let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0);
54604 assert_eq_m512i(r, e);
54605 }
54606
54607 #[simd_test(enable = "avx512f,avx512vl")]
54608 const fn test_mm256_mask_srlv_epi32() {
54609 let a = _mm256_set_epi32(1 << 5, 0, 0, 0, 0, 0, 0, 0);
54610 let count = _mm256_set1_epi32(1);
54611 let r = _mm256_mask_srlv_epi32(a, 0, a, count);
54612 assert_eq_m256i(r, a);
54613 let r = _mm256_mask_srlv_epi32(a, 0b11111111, a, count);
54614 let e = _mm256_set_epi32(1 << 4, 0, 0, 0, 0, 0, 0, 0);
54615 assert_eq_m256i(r, e);
54616 }
54617
54618 #[simd_test(enable = "avx512f,avx512vl")]
54619 const fn test_mm256_maskz_srlv_epi32() {
54620 let a = _mm256_set_epi32(1 << 5, 0, 0, 0, 0, 0, 0, 0);
54621 let count = _mm256_set1_epi32(1);
54622 let r = _mm256_maskz_srlv_epi32(0, a, count);
54623 assert_eq_m256i(r, _mm256_setzero_si256());
54624 let r = _mm256_maskz_srlv_epi32(0b11111111, a, count);
54625 let e = _mm256_set_epi32(1 << 4, 0, 0, 0, 0, 0, 0, 0);
54626 assert_eq_m256i(r, e);
54627 }
54628
54629 #[simd_test(enable = "avx512f,avx512vl")]
54630 const fn test_mm_mask_srlv_epi32() {
54631 let a = _mm_set_epi32(1 << 5, 0, 0, 0);
54632 let count = _mm_set1_epi32(1);
54633 let r = _mm_mask_srlv_epi32(a, 0, a, count);
54634 assert_eq_m128i(r, a);
54635 let r = _mm_mask_srlv_epi32(a, 0b00001111, a, count);
54636 let e = _mm_set_epi32(1 << 4, 0, 0, 0);
54637 assert_eq_m128i(r, e);
54638 }
54639
54640 #[simd_test(enable = "avx512f,avx512vl")]
54641 const fn test_mm_maskz_srlv_epi32() {
54642 let a = _mm_set_epi32(1 << 5, 0, 0, 0);
54643 let count = _mm_set1_epi32(1);
54644 let r = _mm_maskz_srlv_epi32(0, a, count);
54645 assert_eq_m128i(r, _mm_setzero_si128());
54646 let r = _mm_maskz_srlv_epi32(0b00001111, a, count);
54647 let e = _mm_set_epi32(1 << 4, 0, 0, 0);
54648 assert_eq_m128i(r, e);
54649 }
54650
54651 #[simd_test(enable = "avx512f")]
54652 fn test_mm512_sll_epi32() {
54653 #[rustfmt::skip]
54654 let a = _mm512_set_epi32(
54655 1 << 31, 1 << 0, 1 << 1, 1 << 2,
54656 0, 0, 0, 0,
54657 0, 0, 0, 0,
54658 0, 0, 0, 0,
54659 );
54660 let count = _mm_set_epi32(0, 0, 0, 2);
54661 let r = _mm512_sll_epi32(a, count);
54662 #[rustfmt::skip]
54663 let e = _mm512_set_epi32(
54664 0, 1 << 2, 1 << 3, 1 << 4,
54665 0, 0, 0, 0,
54666 0, 0, 0, 0,
54667 0, 0, 0, 0,
54668 );
54669 assert_eq_m512i(r, e);
54670 }
54671
54672 #[simd_test(enable = "avx512f")]
54673 fn test_mm512_mask_sll_epi32() {
54674 #[rustfmt::skip]
54675 let a = _mm512_set_epi32(
54676 1 << 31, 1 << 0, 1 << 1, 1 << 2,
54677 0, 0, 0, 0,
54678 0, 0, 0, 0,
54679 0, 0, 0, 0,
54680 );
54681 let count = _mm_set_epi32(0, 0, 0, 2);
54682 let r = _mm512_mask_sll_epi32(a, 0, a, count);
54683 assert_eq_m512i(r, a);
54684 let r = _mm512_mask_sll_epi32(a, 0b11111111_11111111, a, count);
54685 #[rustfmt::skip]
54686 let e = _mm512_set_epi32(
54687 0, 1 << 2, 1 << 3, 1 << 4,
54688 0, 0, 0, 0,
54689 0, 0, 0, 0,
54690 0, 0, 0, 0,
54691 );
54692 assert_eq_m512i(r, e);
54693 }
54694
54695 #[simd_test(enable = "avx512f")]
54696 fn test_mm512_maskz_sll_epi32() {
54697 #[rustfmt::skip]
54698 let a = _mm512_set_epi32(
54699 1 << 31, 1 << 0, 1 << 1, 1 << 2,
54700 0, 0, 0, 0,
54701 0, 0, 0, 0,
54702 0, 0, 0, 1 << 31,
54703 );
54704 let count = _mm_set_epi32(2, 0, 0, 2);
54705 let r = _mm512_maskz_sll_epi32(0, a, count);
54706 assert_eq_m512i(r, _mm512_setzero_si512());
54707 let r = _mm512_maskz_sll_epi32(0b00000000_11111111, a, count);
54708 let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
54709 assert_eq_m512i(r, e);
54710 }
54711
54712 #[simd_test(enable = "avx512f,avx512vl")]
54713 fn test_mm256_mask_sll_epi32() {
54714 let a = _mm256_set_epi32(1 << 13, 0, 0, 0, 0, 0, 0, 0);
54715 let count = _mm_set_epi32(0, 0, 0, 1);
54716 let r = _mm256_mask_sll_epi32(a, 0, a, count);
54717 assert_eq_m256i(r, a);
54718 let r = _mm256_mask_sll_epi32(a, 0b11111111, a, count);
54719 let e = _mm256_set_epi32(1 << 14, 0, 0, 0, 0, 0, 0, 0);
54720 assert_eq_m256i(r, e);
54721 }
54722
54723 #[simd_test(enable = "avx512f,avx512vl")]
54724 fn test_mm256_maskz_sll_epi32() {
54725 let a = _mm256_set_epi32(1 << 13, 0, 0, 0, 0, 0, 0, 0);
54726 let count = _mm_set_epi32(0, 0, 0, 1);
54727 let r = _mm256_maskz_sll_epi32(0, a, count);
54728 assert_eq_m256i(r, _mm256_setzero_si256());
54729 let r = _mm256_maskz_sll_epi32(0b11111111, a, count);
54730 let e = _mm256_set_epi32(1 << 14, 0, 0, 0, 0, 0, 0, 0);
54731 assert_eq_m256i(r, e);
54732 }
54733
54734 #[simd_test(enable = "avx512f,avx512vl")]
54735 fn test_mm_mask_sll_epi32() {
54736 let a = _mm_set_epi32(1 << 13, 0, 0, 0);
54737 let count = _mm_set_epi32(0, 0, 0, 1);
54738 let r = _mm_mask_sll_epi32(a, 0, a, count);
54739 assert_eq_m128i(r, a);
54740 let r = _mm_mask_sll_epi32(a, 0b00001111, a, count);
54741 let e = _mm_set_epi32(1 << 14, 0, 0, 0);
54742 assert_eq_m128i(r, e);
54743 }
54744
54745 #[simd_test(enable = "avx512f,avx512vl")]
54746 fn test_mm_maskz_sll_epi32() {
54747 let a = _mm_set_epi32(1 << 13, 0, 0, 0);
54748 let count = _mm_set_epi32(0, 0, 0, 1);
54749 let r = _mm_maskz_sll_epi32(0, a, count);
54750 assert_eq_m128i(r, _mm_setzero_si128());
54751 let r = _mm_maskz_sll_epi32(0b00001111, a, count);
54752 let e = _mm_set_epi32(1 << 14, 0, 0, 0);
54753 assert_eq_m128i(r, e);
54754 }
54755
54756 #[simd_test(enable = "avx512f")]
54757 fn test_mm512_srl_epi32() {
54758 #[rustfmt::skip]
54759 let a = _mm512_set_epi32(
54760 1 << 31, 1 << 0, 1 << 1, 1 << 2,
54761 0, 0, 0, 0,
54762 0, 0, 0, 0,
54763 0, 0, 0, 0,
54764 );
54765 let count = _mm_set_epi32(0, 0, 0, 2);
54766 let r = _mm512_srl_epi32(a, count);
54767 let e = _mm512_set_epi32(1 << 29, 0, 0, 1 << 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
54768 assert_eq_m512i(r, e);
54769 }
54770
54771 #[simd_test(enable = "avx512f")]
54772 fn test_mm512_mask_srl_epi32() {
54773 #[rustfmt::skip]
54774 let a = _mm512_set_epi32(
54775 1 << 31, 1 << 0, 1 << 1, 1 << 2,
54776 0, 0, 0, 0,
54777 0, 0, 0, 0,
54778 0, 0, 0, 0,
54779 );
54780 let count = _mm_set_epi32(0, 0, 0, 2);
54781 let r = _mm512_mask_srl_epi32(a, 0, a, count);
54782 assert_eq_m512i(r, a);
54783 let r = _mm512_mask_srl_epi32(a, 0b11111111_11111111, a, count);
54784 let e = _mm512_set_epi32(1 << 29, 0, 0, 1 << 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
54785 assert_eq_m512i(r, e);
54786 }
54787
54788 #[simd_test(enable = "avx512f")]
54789 fn test_mm512_maskz_srl_epi32() {
54790 #[rustfmt::skip]
54791 let a = _mm512_set_epi32(
54792 1 << 31, 1 << 0, 1 << 1, 1 << 2,
54793 0, 0, 0, 0,
54794 0, 0, 0, 0,
54795 0, 0, 0, 1 << 31,
54796 );
54797 let count = _mm_set_epi32(2, 0, 0, 2);
54798 let r = _mm512_maskz_srl_epi32(0, a, count);
54799 assert_eq_m512i(r, _mm512_setzero_si512());
54800 let r = _mm512_maskz_srl_epi32(0b00000000_11111111, a, count);
54801 let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 << 29);
54802 assert_eq_m512i(r, e);
54803 }
54804
54805 #[simd_test(enable = "avx512f,avx512vl")]
54806 fn test_mm256_mask_srl_epi32() {
54807 let a = _mm256_set_epi32(1 << 5, 0, 0, 0, 0, 0, 0, 0);
54808 let count = _mm_set_epi32(0, 0, 0, 1);
54809 let r = _mm256_mask_srl_epi32(a, 0, a, count);
54810 assert_eq_m256i(r, a);
54811 let r = _mm256_mask_srl_epi32(a, 0b11111111, a, count);
54812 let e = _mm256_set_epi32(1 << 4, 0, 0, 0, 0, 0, 0, 0);
54813 assert_eq_m256i(r, e);
54814 }
54815
54816 #[simd_test(enable = "avx512f,avx512vl")]
54817 fn test_mm256_maskz_srl_epi32() {
54818 let a = _mm256_set_epi32(1 << 5, 0, 0, 0, 0, 0, 0, 0);
54819 let count = _mm_set_epi32(0, 0, 0, 1);
54820 let r = _mm256_maskz_srl_epi32(0, a, count);
54821 assert_eq_m256i(r, _mm256_setzero_si256());
54822 let r = _mm256_maskz_srl_epi32(0b11111111, a, count);
54823 let e = _mm256_set_epi32(1 << 4, 0, 0, 0, 0, 0, 0, 0);
54824 assert_eq_m256i(r, e);
54825 }
54826
54827 #[simd_test(enable = "avx512f,avx512vl")]
54828 fn test_mm_mask_srl_epi32() {
54829 let a = _mm_set_epi32(1 << 5, 0, 0, 0);
54830 let count = _mm_set_epi32(0, 0, 0, 1);
54831 let r = _mm_mask_srl_epi32(a, 0, a, count);
54832 assert_eq_m128i(r, a);
54833 let r = _mm_mask_srl_epi32(a, 0b00001111, a, count);
54834 let e = _mm_set_epi32(1 << 4, 0, 0, 0);
54835 assert_eq_m128i(r, e);
54836 }
54837
54838 #[simd_test(enable = "avx512f,avx512vl")]
54839 fn test_mm_maskz_srl_epi32() {
54840 let a = _mm_set_epi32(1 << 5, 0, 0, 0);
54841 let count = _mm_set_epi32(0, 0, 0, 1);
54842 let r = _mm_maskz_srl_epi32(0, a, count);
54843 assert_eq_m128i(r, _mm_setzero_si128());
54844 let r = _mm_maskz_srl_epi32(0b00001111, a, count);
54845 let e = _mm_set_epi32(1 << 4, 0, 0, 0);
54846 assert_eq_m128i(r, e);
54847 }
54848
54849 #[simd_test(enable = "avx512f")]
54850 fn test_mm512_sra_epi32() {
54851 let a = _mm512_set_epi32(8, -8, 16, -15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1);
54852 let count = _mm_set_epi32(1, 0, 0, 2);
54853 let r = _mm512_sra_epi32(a, count);
54854 let e = _mm512_set_epi32(2, -2, 4, -4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
54855 assert_eq_m512i(r, e);
54856 }
54857
54858 #[simd_test(enable = "avx512f")]
54859 fn test_mm512_mask_sra_epi32() {
54860 let a = _mm512_set_epi32(8, -8, 16, -15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16);
54861 let count = _mm_set_epi32(0, 0, 0, 2);
54862 let r = _mm512_mask_sra_epi32(a, 0, a, count);
54863 assert_eq_m512i(r, a);
54864 let r = _mm512_mask_sra_epi32(a, 0b11111111_11111111, a, count);
54865 let e = _mm512_set_epi32(2, -2, 4, -4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4);
54866 assert_eq_m512i(r, e);
54867 }
54868
54869 #[simd_test(enable = "avx512f")]
54870 fn test_mm512_maskz_sra_epi32() {
54871 let a = _mm512_set_epi32(8, -8, 16, -15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -15, -14);
54872 let count = _mm_set_epi32(2, 0, 0, 2);
54873 let r = _mm512_maskz_sra_epi32(0, a, count);
54874 assert_eq_m512i(r, _mm512_setzero_si512());
54875 let r = _mm512_maskz_sra_epi32(0b00000000_11111111, a, count);
54876 let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -4, -4);
54877 assert_eq_m512i(r, e);
54878 }
54879
54880 #[simd_test(enable = "avx512f,avx512vl")]
54881 fn test_mm256_mask_sra_epi32() {
54882 let a = _mm256_set_epi32(1 << 5, 0, 0, 0, 0, 0, 0, 0);
54883 let count = _mm_set_epi32(0, 0, 0, 1);
54884 let r = _mm256_mask_sra_epi32(a, 0, a, count);
54885 assert_eq_m256i(r, a);
54886 let r = _mm256_mask_sra_epi32(a, 0b11111111, a, count);
54887 let e = _mm256_set_epi32(1 << 4, 0, 0, 0, 0, 0, 0, 0);
54888 assert_eq_m256i(r, e);
54889 }
54890
54891 #[simd_test(enable = "avx512f,avx512vl")]
54892 fn test_mm256_maskz_sra_epi32() {
54893 let a = _mm256_set_epi32(1 << 5, 0, 0, 0, 0, 0, 0, 0);
54894 let count = _mm_set_epi32(0, 0, 0, 1);
54895 let r = _mm256_maskz_sra_epi32(0, a, count);
54896 assert_eq_m256i(r, _mm256_setzero_si256());
54897 let r = _mm256_maskz_sra_epi32(0b11111111, a, count);
54898 let e = _mm256_set_epi32(1 << 4, 0, 0, 0, 0, 0, 0, 0);
54899 assert_eq_m256i(r, e);
54900 }
54901
54902 #[simd_test(enable = "avx512f,avx512vl")]
54903 fn test_mm_mask_sra_epi32() {
54904 let a = _mm_set_epi32(1 << 5, 0, 0, 0);
54905 let count = _mm_set_epi32(0, 0, 0, 1);
54906 let r = _mm_mask_sra_epi32(a, 0, a, count);
54907 assert_eq_m128i(r, a);
54908 let r = _mm_mask_sra_epi32(a, 0b00001111, a, count);
54909 let e = _mm_set_epi32(1 << 4, 0, 0, 0);
54910 assert_eq_m128i(r, e);
54911 }
54912
54913 #[simd_test(enable = "avx512f,avx512vl")]
54914 fn test_mm_maskz_sra_epi32() {
54915 let a = _mm_set_epi32(1 << 5, 0, 0, 0);
54916 let count = _mm_set_epi32(0, 0, 0, 1);
54917 let r = _mm_maskz_sra_epi32(0, a, count);
54918 assert_eq_m128i(r, _mm_setzero_si128());
54919 let r = _mm_maskz_sra_epi32(0b00001111, a, count);
54920 let e = _mm_set_epi32(1 << 4, 0, 0, 0);
54921 assert_eq_m128i(r, e);
54922 }
54923
54924 #[simd_test(enable = "avx512f")]
54925 const fn test_mm512_srav_epi32() {
54926 let a = _mm512_set_epi32(8, -8, 16, -15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1);
54927 let count = _mm512_set_epi32(2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
54928 let r = _mm512_srav_epi32(a, count);
54929 let e = _mm512_set_epi32(2, -2, 4, -4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1);
54930 assert_eq_m512i(r, e);
54931 }
54932
54933 #[simd_test(enable = "avx512f")]
54934 const fn test_mm512_mask_srav_epi32() {
54935 let a = _mm512_set_epi32(8, -8, 16, -15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16);
54936 let count = _mm512_set_epi32(2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1);
54937 let r = _mm512_mask_srav_epi32(a, 0, a, count);
54938 assert_eq_m512i(r, a);
54939 let r = _mm512_mask_srav_epi32(a, 0b11111111_11111111, a, count);
54940 let e = _mm512_set_epi32(2, -2, 4, -4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8);
54941 assert_eq_m512i(r, e);
54942 }
54943
54944 #[simd_test(enable = "avx512f")]
54945 const fn test_mm512_maskz_srav_epi32() {
54946 let a = _mm512_set_epi32(8, -8, 16, -15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -15, -14);
54947 let count = _mm512_set_epi32(2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2);
54948 let r = _mm512_maskz_srav_epi32(0, a, count);
54949 assert_eq_m512i(r, _mm512_setzero_si512());
54950 let r = _mm512_maskz_srav_epi32(0b00000000_11111111, a, count);
54951 let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -4, -4);
54952 assert_eq_m512i(r, e);
54953 }
54954
54955 #[simd_test(enable = "avx512f,avx512vl")]
54956 const fn test_mm256_mask_srav_epi32() {
54957 let a = _mm256_set_epi32(1 << 5, 0, 0, 0, 0, 0, 0, 0);
54958 let count = _mm256_set1_epi32(1);
54959 let r = _mm256_mask_srav_epi32(a, 0, a, count);
54960 assert_eq_m256i(r, a);
54961 let r = _mm256_mask_srav_epi32(a, 0b11111111, a, count);
54962 let e = _mm256_set_epi32(1 << 4, 0, 0, 0, 0, 0, 0, 0);
54963 assert_eq_m256i(r, e);
54964 }
54965
54966 #[simd_test(enable = "avx512f,avx512vl")]
54967 const fn test_mm256_maskz_srav_epi32() {
54968 let a = _mm256_set_epi32(1 << 5, 0, 0, 0, 0, 0, 0, 0);
54969 let count = _mm256_set1_epi32(1);
54970 let r = _mm256_maskz_srav_epi32(0, a, count);
54971 assert_eq_m256i(r, _mm256_setzero_si256());
54972 let r = _mm256_maskz_srav_epi32(0b11111111, a, count);
54973 let e = _mm256_set_epi32(1 << 4, 0, 0, 0, 0, 0, 0, 0);
54974 assert_eq_m256i(r, e);
54975 }
54976
54977 #[simd_test(enable = "avx512f,avx512vl")]
54978 const fn test_mm_mask_srav_epi32() {
54979 let a = _mm_set_epi32(1 << 5, 0, 0, 0);
54980 let count = _mm_set1_epi32(1);
54981 let r = _mm_mask_srav_epi32(a, 0, a, count);
54982 assert_eq_m128i(r, a);
54983 let r = _mm_mask_srav_epi32(a, 0b00001111, a, count);
54984 let e = _mm_set_epi32(1 << 4, 0, 0, 0);
54985 assert_eq_m128i(r, e);
54986 }
54987
54988 #[simd_test(enable = "avx512f,avx512vl")]
54989 const fn test_mm_maskz_srav_epi32() {
54990 let a = _mm_set_epi32(1 << 5, 0, 0, 0);
54991 let count = _mm_set1_epi32(1);
54992 let r = _mm_maskz_srav_epi32(0, a, count);
54993 assert_eq_m128i(r, _mm_setzero_si128());
54994 let r = _mm_maskz_srav_epi32(0b00001111, a, count);
54995 let e = _mm_set_epi32(1 << 4, 0, 0, 0);
54996 assert_eq_m128i(r, e);
54997 }
54998
54999 #[simd_test(enable = "avx512f")]
55000 const fn test_mm512_srai_epi32() {
55001 let a = _mm512_set_epi32(8, -8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16, -15);
55002 let r = _mm512_srai_epi32::<2>(a);
55003 let e = _mm512_set_epi32(2, -2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, -4);
55004 assert_eq_m512i(r, e);
55005 }
55006
55007 #[simd_test(enable = "avx512f")]
55008 const fn test_mm512_mask_srai_epi32() {
55009 let a = _mm512_set_epi32(8, -8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 15, -15);
55010 let r = _mm512_mask_srai_epi32::<2>(a, 0, a);
55011 assert_eq_m512i(r, a);
55012 let r = _mm512_mask_srai_epi32::<2>(a, 0b11111111_11111111, a);
55013 let e = _mm512_set_epi32(2, -2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, -4);
55014 assert_eq_m512i(r, e);
55015 }
55016
55017 #[simd_test(enable = "avx512f")]
55018 const fn test_mm512_maskz_srai_epi32() {
55019 let a = _mm512_set_epi32(8, -8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 15, -15);
55020 let r = _mm512_maskz_srai_epi32::<2>(0, a);
55021 assert_eq_m512i(r, _mm512_setzero_si512());
55022 let r = _mm512_maskz_srai_epi32::<2>(0b00000000_11111111, a);
55023 let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, -4);
55024 assert_eq_m512i(r, e);
55025 }
55026
55027 #[simd_test(enable = "avx512f,avx512vl")]
55028 const fn test_mm256_mask_srai_epi32() {
55029 let a = _mm256_set_epi32(1 << 5, 0, 0, 0, 0, 0, 0, 0);
55030 let r = _mm256_mask_srai_epi32::<1>(a, 0, a);
55031 assert_eq_m256i(r, a);
55032 let r = _mm256_mask_srai_epi32::<1>(a, 0b11111111, a);
55033 let e = _mm256_set_epi32(1 << 4, 0, 0, 0, 0, 0, 0, 0);
55034 assert_eq_m256i(r, e);
55035 }
55036
55037 #[simd_test(enable = "avx512f,avx512vl")]
55038 const fn test_mm256_maskz_srai_epi32() {
55039 let a = _mm256_set_epi32(1 << 5, 0, 0, 0, 0, 0, 0, 0);
55040 let r = _mm256_maskz_srai_epi32::<1>(0, a);
55041 assert_eq_m256i(r, _mm256_setzero_si256());
55042 let r = _mm256_maskz_srai_epi32::<1>(0b11111111, a);
55043 let e = _mm256_set_epi32(1 << 4, 0, 0, 0, 0, 0, 0, 0);
55044 assert_eq_m256i(r, e);
55045 }
55046
55047 #[simd_test(enable = "avx512f,avx512vl")]
55048 const fn test_mm_mask_srai_epi32() {
55049 let a = _mm_set_epi32(1 << 5, 0, 0, 0);
55050 let r = _mm_mask_srai_epi32::<1>(a, 0, a);
55051 assert_eq_m128i(r, a);
55052 let r = _mm_mask_srai_epi32::<1>(a, 0b00001111, a);
55053 let e = _mm_set_epi32(1 << 4, 0, 0, 0);
55054 assert_eq_m128i(r, e);
55055 }
55056
55057 #[simd_test(enable = "avx512f,avx512vl")]
55058 const fn test_mm_maskz_srai_epi32() {
55059 let a = _mm_set_epi32(1 << 5, 0, 0, 0);
55060 let r = _mm_maskz_srai_epi32::<1>(0, a);
55061 assert_eq_m128i(r, _mm_setzero_si128());
55062 let r = _mm_maskz_srai_epi32::<1>(0b00001111, a);
55063 let e = _mm_set_epi32(1 << 4, 0, 0, 0);
55064 assert_eq_m128i(r, e);
55065 }
55066
55067 #[simd_test(enable = "avx512f")]
55068 const fn test_mm512_permute_ps() {
55069 let a = _mm512_setr_ps(
55070 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
55071 );
55072 let r = _mm512_permute_ps::<0b11_11_11_11>(a);
55073 let e = _mm512_setr_ps(
55074 3., 3., 3., 3., 7., 7., 7., 7., 11., 11., 11., 11., 15., 15., 15., 15.,
55075 );
55076 assert_eq_m512(r, e);
55077 }
55078
55079 #[simd_test(enable = "avx512f")]
55080 const fn test_mm512_mask_permute_ps() {
55081 let a = _mm512_setr_ps(
55082 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
55083 );
55084 let r = _mm512_mask_permute_ps::<0b11_11_11_11>(a, 0, a);
55085 assert_eq_m512(r, a);
55086 let r = _mm512_mask_permute_ps::<0b11_11_11_11>(a, 0b11111111_11111111, a);
55087 let e = _mm512_setr_ps(
55088 3., 3., 3., 3., 7., 7., 7., 7., 11., 11., 11., 11., 15., 15., 15., 15.,
55089 );
55090 assert_eq_m512(r, e);
55091 }
55092
55093 #[simd_test(enable = "avx512f")]
55094 const fn test_mm512_maskz_permute_ps() {
55095 let a = _mm512_setr_ps(
55096 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
55097 );
55098 let r = _mm512_maskz_permute_ps::<0b11_11_11_11>(0, a);
55099 assert_eq_m512(r, _mm512_setzero_ps());
55100 let r = _mm512_maskz_permute_ps::<0b11_11_11_11>(0b11111111_11111111, a);
55101 let e = _mm512_setr_ps(
55102 3., 3., 3., 3., 7., 7., 7., 7., 11., 11., 11., 11., 15., 15., 15., 15.,
55103 );
55104 assert_eq_m512(r, e);
55105 }
55106
55107 #[simd_test(enable = "avx512f,avx512vl")]
55108 const fn test_mm256_mask_permute_ps() {
55109 let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
55110 let r = _mm256_mask_permute_ps::<0b11_11_11_11>(a, 0, a);
55111 assert_eq_m256(r, a);
55112 let r = _mm256_mask_permute_ps::<0b11_11_11_11>(a, 0b11111111, a);
55113 let e = _mm256_set_ps(0., 0., 0., 0., 4., 4., 4., 4.);
55114 assert_eq_m256(r, e);
55115 }
55116
55117 #[simd_test(enable = "avx512f,avx512vl")]
55118 const fn test_mm256_maskz_permute_ps() {
55119 let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
55120 let r = _mm256_maskz_permute_ps::<0b11_11_11_11>(0, a);
55121 assert_eq_m256(r, _mm256_setzero_ps());
55122 let r = _mm256_maskz_permute_ps::<0b11_11_11_11>(0b11111111, a);
55123 let e = _mm256_set_ps(0., 0., 0., 0., 4., 4., 4., 4.);
55124 assert_eq_m256(r, e);
55125 }
55126
55127 #[simd_test(enable = "avx512f,avx512vl")]
55128 const fn test_mm_mask_permute_ps() {
55129 let a = _mm_set_ps(0., 1., 2., 3.);
55130 let r = _mm_mask_permute_ps::<0b11_11_11_11>(a, 0, a);
55131 assert_eq_m128(r, a);
55132 let r = _mm_mask_permute_ps::<0b11_11_11_11>(a, 0b00001111, a);
55133 let e = _mm_set_ps(0., 0., 0., 0.);
55134 assert_eq_m128(r, e);
55135 }
55136
55137 #[simd_test(enable = "avx512f,avx512vl")]
55138 const fn test_mm_maskz_permute_ps() {
55139 let a = _mm_set_ps(0., 1., 2., 3.);
55140 let r = _mm_maskz_permute_ps::<0b11_11_11_11>(0, a);
55141 assert_eq_m128(r, _mm_setzero_ps());
55142 let r = _mm_maskz_permute_ps::<0b11_11_11_11>(0b00001111, a);
55143 let e = _mm_set_ps(0., 0., 0., 0.);
55144 assert_eq_m128(r, e);
55145 }
55146
55147 #[simd_test(enable = "avx512f")]
55148 fn test_mm512_permutevar_epi32() {
55149 let idx = _mm512_set1_epi32(1);
55150 let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
55151 let r = _mm512_permutevar_epi32(idx, a);
55152 let e = _mm512_set1_epi32(14);
55153 assert_eq_m512i(r, e);
55154 }
55155
55156 #[simd_test(enable = "avx512f")]
55157 fn test_mm512_mask_permutevar_epi32() {
55158 let idx = _mm512_set1_epi32(1);
55159 let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
55160 let r = _mm512_mask_permutevar_epi32(a, 0, idx, a);
55161 assert_eq_m512i(r, a);
55162 let r = _mm512_mask_permutevar_epi32(a, 0b11111111_11111111, idx, a);
55163 let e = _mm512_set1_epi32(14);
55164 assert_eq_m512i(r, e);
55165 }
55166
55167 #[simd_test(enable = "avx512f")]
55168 fn test_mm512_permutevar_ps() {
55169 let a = _mm512_set_ps(
55170 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
55171 );
55172 let b = _mm512_set1_epi32(0b01);
55173 let r = _mm512_permutevar_ps(a, b);
55174 let e = _mm512_set_ps(
55175 2., 2., 2., 2., 6., 6., 6., 6., 10., 10., 10., 10., 14., 14., 14., 14.,
55176 );
55177 assert_eq_m512(r, e);
55178 }
55179
55180 #[simd_test(enable = "avx512f")]
55181 fn test_mm512_mask_permutevar_ps() {
55182 let a = _mm512_set_ps(
55183 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
55184 );
55185 let b = _mm512_set1_epi32(0b01);
55186 let r = _mm512_mask_permutevar_ps(a, 0, a, b);
55187 assert_eq_m512(r, a);
55188 let r = _mm512_mask_permutevar_ps(a, 0b11111111_11111111, a, b);
55189 let e = _mm512_set_ps(
55190 2., 2., 2., 2., 6., 6., 6., 6., 10., 10., 10., 10., 14., 14., 14., 14.,
55191 );
55192 assert_eq_m512(r, e);
55193 }
55194
55195 #[simd_test(enable = "avx512f")]
55196 fn test_mm512_maskz_permutevar_ps() {
55197 let a = _mm512_set_ps(
55198 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
55199 );
55200 let b = _mm512_set1_epi32(0b01);
55201 let r = _mm512_maskz_permutevar_ps(0, a, b);
55202 assert_eq_m512(r, _mm512_setzero_ps());
55203 let r = _mm512_maskz_permutevar_ps(0b00000000_11111111, a, b);
55204 let e = _mm512_set_ps(
55205 0., 0., 0., 0., 0., 0., 0., 0., 10., 10., 10., 10., 14., 14., 14., 14.,
55206 );
55207 assert_eq_m512(r, e);
55208 }
55209
55210 #[simd_test(enable = "avx512f,avx512vl")]
55211 fn test_mm256_mask_permutevar_ps() {
55212 let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
55213 let b = _mm256_set1_epi32(0b01);
55214 let r = _mm256_mask_permutevar_ps(a, 0, a, b);
55215 assert_eq_m256(r, a);
55216 let r = _mm256_mask_permutevar_ps(a, 0b11111111, a, b);
55217 let e = _mm256_set_ps(2., 2., 2., 2., 6., 6., 6., 6.);
55218 assert_eq_m256(r, e);
55219 }
55220
55221 #[simd_test(enable = "avx512f,avx512vl")]
55222 fn test_mm256_maskz_permutevar_ps() {
55223 let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
55224 let b = _mm256_set1_epi32(0b01);
55225 let r = _mm256_maskz_permutevar_ps(0, a, b);
55226 assert_eq_m256(r, _mm256_setzero_ps());
55227 let r = _mm256_maskz_permutevar_ps(0b11111111, a, b);
55228 let e = _mm256_set_ps(2., 2., 2., 2., 6., 6., 6., 6.);
55229 assert_eq_m256(r, e);
55230 }
55231
55232 #[simd_test(enable = "avx512f,avx512vl")]
55233 fn test_mm_mask_permutevar_ps() {
55234 let a = _mm_set_ps(0., 1., 2., 3.);
55235 let b = _mm_set1_epi32(0b01);
55236 let r = _mm_mask_permutevar_ps(a, 0, a, b);
55237 assert_eq_m128(r, a);
55238 let r = _mm_mask_permutevar_ps(a, 0b00001111, a, b);
55239 let e = _mm_set_ps(2., 2., 2., 2.);
55240 assert_eq_m128(r, e);
55241 }
55242
55243 #[simd_test(enable = "avx512f,avx512vl")]
55244 fn test_mm_maskz_permutevar_ps() {
55245 let a = _mm_set_ps(0., 1., 2., 3.);
55246 let b = _mm_set1_epi32(0b01);
55247 let r = _mm_maskz_permutevar_ps(0, a, b);
55248 assert_eq_m128(r, _mm_setzero_ps());
55249 let r = _mm_maskz_permutevar_ps(0b00001111, a, b);
55250 let e = _mm_set_ps(2., 2., 2., 2.);
55251 assert_eq_m128(r, e);
55252 }
55253
55254 #[simd_test(enable = "avx512f")]
55255 fn test_mm512_permutexvar_epi32() {
55256 let idx = _mm512_set1_epi32(1);
55257 let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
55258 let r = _mm512_permutexvar_epi32(idx, a);
55259 let e = _mm512_set1_epi32(14);
55260 assert_eq_m512i(r, e);
55261 }
55262
55263 #[simd_test(enable = "avx512f")]
55264 fn test_mm512_mask_permutexvar_epi32() {
55265 let idx = _mm512_set1_epi32(1);
55266 let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
55267 let r = _mm512_mask_permutexvar_epi32(a, 0, idx, a);
55268 assert_eq_m512i(r, a);
55269 let r = _mm512_mask_permutexvar_epi32(a, 0b11111111_11111111, idx, a);
55270 let e = _mm512_set1_epi32(14);
55271 assert_eq_m512i(r, e);
55272 }
55273
55274 #[simd_test(enable = "avx512f")]
55275 fn test_mm512_maskz_permutexvar_epi32() {
55276 let idx = _mm512_set1_epi32(1);
55277 let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
55278 let r = _mm512_maskz_permutexvar_epi32(0, idx, a);
55279 assert_eq_m512i(r, _mm512_setzero_si512());
55280 let r = _mm512_maskz_permutexvar_epi32(0b00000000_11111111, idx, a);
55281 let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 14, 14, 14, 14, 14, 14, 14, 14);
55282 assert_eq_m512i(r, e);
55283 }
55284
55285 #[simd_test(enable = "avx512f,avx512vl")]
55286 fn test_mm256_permutexvar_epi32() {
55287 let idx = _mm256_set1_epi32(1);
55288 let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
55289 let r = _mm256_permutexvar_epi32(idx, a);
55290 let e = _mm256_set1_epi32(6);
55291 assert_eq_m256i(r, e);
55292 }
55293
55294 #[simd_test(enable = "avx512f,avx512vl")]
55295 fn test_mm256_mask_permutexvar_epi32() {
55296 let idx = _mm256_set1_epi32(1);
55297 let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
55298 let r = _mm256_mask_permutexvar_epi32(a, 0, idx, a);
55299 assert_eq_m256i(r, a);
55300 let r = _mm256_mask_permutexvar_epi32(a, 0b11111111, idx, a);
55301 let e = _mm256_set1_epi32(6);
55302 assert_eq_m256i(r, e);
55303 }
55304
55305 #[simd_test(enable = "avx512f,avx512vl")]
55306 fn test_mm256_maskz_permutexvar_epi32() {
55307 let idx = _mm256_set1_epi32(1);
55308 let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
55309 let r = _mm256_maskz_permutexvar_epi32(0, idx, a);
55310 assert_eq_m256i(r, _mm256_setzero_si256());
55311 let r = _mm256_maskz_permutexvar_epi32(0b11111111, idx, a);
55312 let e = _mm256_set1_epi32(6);
55313 assert_eq_m256i(r, e);
55314 }
55315
55316 #[simd_test(enable = "avx512f")]
55317 fn test_mm512_permutexvar_ps() {
55318 let idx = _mm512_set1_epi32(1);
55319 let a = _mm512_set_ps(
55320 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
55321 );
55322 let r = _mm512_permutexvar_ps(idx, a);
55323 let e = _mm512_set1_ps(14.);
55324 assert_eq_m512(r, e);
55325 }
55326
55327 #[simd_test(enable = "avx512f")]
55328 fn test_mm512_mask_permutexvar_ps() {
55329 let idx = _mm512_set1_epi32(1);
55330 let a = _mm512_set_ps(
55331 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
55332 );
55333 let r = _mm512_mask_permutexvar_ps(a, 0, idx, a);
55334 assert_eq_m512(r, a);
55335 let r = _mm512_mask_permutexvar_ps(a, 0b11111111_11111111, idx, a);
55336 let e = _mm512_set1_ps(14.);
55337 assert_eq_m512(r, e);
55338 }
55339
55340 #[simd_test(enable = "avx512f")]
55341 fn test_mm512_maskz_permutexvar_ps() {
55342 let idx = _mm512_set1_epi32(1);
55343 let a = _mm512_set_ps(
55344 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
55345 );
55346 let r = _mm512_maskz_permutexvar_ps(0, idx, a);
55347 assert_eq_m512(r, _mm512_setzero_ps());
55348 let r = _mm512_maskz_permutexvar_ps(0b00000000_11111111, idx, a);
55349 let e = _mm512_set_ps(
55350 0., 0., 0., 0., 0., 0., 0., 0., 14., 14., 14., 14., 14., 14., 14., 14.,
55351 );
55352 assert_eq_m512(r, e);
55353 }
55354
55355 #[simd_test(enable = "avx512f,avx512vl")]
55356 fn test_mm256_permutexvar_ps() {
55357 let idx = _mm256_set1_epi32(1);
55358 let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
55359 let r = _mm256_permutexvar_ps(idx, a);
55360 let e = _mm256_set1_ps(6.);
55361 assert_eq_m256(r, e);
55362 }
55363
55364 #[simd_test(enable = "avx512f,avx512vl")]
55365 fn test_mm256_mask_permutexvar_ps() {
55366 let idx = _mm256_set1_epi32(1);
55367 let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
55368 let r = _mm256_mask_permutexvar_ps(a, 0, idx, a);
55369 assert_eq_m256(r, a);
55370 let r = _mm256_mask_permutexvar_ps(a, 0b11111111, idx, a);
55371 let e = _mm256_set1_ps(6.);
55372 assert_eq_m256(r, e);
55373 }
55374
55375 #[simd_test(enable = "avx512f,avx512vl")]
55376 fn test_mm256_maskz_permutexvar_ps() {
55377 let idx = _mm256_set1_epi32(1);
55378 let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
55379 let r = _mm256_maskz_permutexvar_ps(0, idx, a);
55380 assert_eq_m256(r, _mm256_setzero_ps());
55381 let r = _mm256_maskz_permutexvar_ps(0b11111111, idx, a);
55382 let e = _mm256_set1_ps(6.);
55383 assert_eq_m256(r, e);
55384 }
55385
55386 #[simd_test(enable = "avx512f")]
55387 fn test_mm512_permutex2var_epi32() {
55388 let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
55389 #[rustfmt::skip]
55390 let idx = _mm512_set_epi32(
55391 1, 1 << 4, 2, 1 << 4,
55392 3, 1 << 4, 4, 1 << 4,
55393 5, 1 << 4, 6, 1 << 4,
55394 7, 1 << 4, 8, 1 << 4,
55395 );
55396 let b = _mm512_set1_epi32(100);
55397 let r = _mm512_permutex2var_epi32(a, idx, b);
55398 let e = _mm512_set_epi32(
55399 14, 100, 13, 100, 12, 100, 11, 100, 10, 100, 9, 100, 8, 100, 7, 100,
55400 );
55401 assert_eq_m512i(r, e);
55402 }
55403
55404 #[simd_test(enable = "avx512f")]
55405 fn test_mm512_mask_permutex2var_epi32() {
55406 let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
55407 #[rustfmt::skip]
55408 let idx = _mm512_set_epi32(
55409 1, 1 << 4, 2, 1 << 4,
55410 3, 1 << 4, 4, 1 << 4,
55411 5, 1 << 4, 6, 1 << 4,
55412 7, 1 << 4, 8, 1 << 4,
55413 );
55414 let b = _mm512_set1_epi32(100);
55415 let r = _mm512_mask_permutex2var_epi32(a, 0, idx, b);
55416 assert_eq_m512i(r, a);
55417 let r = _mm512_mask_permutex2var_epi32(a, 0b11111111_11111111, idx, b);
55418 let e = _mm512_set_epi32(
55419 14, 100, 13, 100, 12, 100, 11, 100, 10, 100, 9, 100, 8, 100, 7, 100,
55420 );
55421 assert_eq_m512i(r, e);
55422 }
55423
55424 #[simd_test(enable = "avx512f")]
55425 fn test_mm512_maskz_permutex2var_epi32() {
55426 let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
55427 #[rustfmt::skip]
55428 let idx = _mm512_set_epi32(
55429 1, 1 << 4, 2, 1 << 4,
55430 3, 1 << 4, 4, 1 << 4,
55431 5, 1 << 4, 6, 1 << 4,
55432 7, 1 << 4, 8, 1 << 4,
55433 );
55434 let b = _mm512_set1_epi32(100);
55435 let r = _mm512_maskz_permutex2var_epi32(0, a, idx, b);
55436 assert_eq_m512i(r, _mm512_setzero_si512());
55437 let r = _mm512_maskz_permutex2var_epi32(0b00000000_11111111, a, idx, b);
55438 let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 10, 100, 9, 100, 8, 100, 7, 100);
55439 assert_eq_m512i(r, e);
55440 }
55441
55442 #[simd_test(enable = "avx512f")]
55443 fn test_mm512_mask2_permutex2var_epi32() {
55444 let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
55445 #[rustfmt::skip]
55446 let idx = _mm512_set_epi32(
55447 1000, 1 << 4, 2000, 1 << 4,
55448 3000, 1 << 4, 4000, 1 << 4,
55449 5, 1 << 4, 6, 1 << 4,
55450 7, 1 << 4, 8, 1 << 4,
55451 );
55452 let b = _mm512_set1_epi32(100);
55453 let r = _mm512_mask2_permutex2var_epi32(a, idx, 0, b);
55454 assert_eq_m512i(r, idx);
55455 let r = _mm512_mask2_permutex2var_epi32(a, idx, 0b00000000_11111111, b);
55456 #[rustfmt::skip]
55457 let e = _mm512_set_epi32(
55458 1000, 1 << 4, 2000, 1 << 4,
55459 3000, 1 << 4, 4000, 1 << 4,
55460 10, 100, 9, 100,
55461 8, 100, 7, 100,
55462 );
55463 assert_eq_m512i(r, e);
55464 }
55465
55466 #[simd_test(enable = "avx512f,avx512vl")]
55467 fn test_mm256_permutex2var_epi32() {
55468 let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
55469 let idx = _mm256_set_epi32(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3);
55470 let b = _mm256_set1_epi32(100);
55471 let r = _mm256_permutex2var_epi32(a, idx, b);
55472 let e = _mm256_set_epi32(6, 100, 5, 100, 4, 100, 3, 100);
55473 assert_eq_m256i(r, e);
55474 }
55475
55476 #[simd_test(enable = "avx512f,avx512vl")]
55477 fn test_mm256_mask_permutex2var_epi32() {
55478 let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
55479 let idx = _mm256_set_epi32(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3);
55480 let b = _mm256_set1_epi32(100);
55481 let r = _mm256_mask_permutex2var_epi32(a, 0, idx, b);
55482 assert_eq_m256i(r, a);
55483 let r = _mm256_mask_permutex2var_epi32(a, 0b11111111, idx, b);
55484 let e = _mm256_set_epi32(6, 100, 5, 100, 4, 100, 3, 100);
55485 assert_eq_m256i(r, e);
55486 }
55487
55488 #[simd_test(enable = "avx512f,avx512vl")]
55489 fn test_mm256_maskz_permutex2var_epi32() {
55490 let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
55491 let idx = _mm256_set_epi32(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3);
55492 let b = _mm256_set1_epi32(100);
55493 let r = _mm256_maskz_permutex2var_epi32(0, a, idx, b);
55494 assert_eq_m256i(r, _mm256_setzero_si256());
55495 let r = _mm256_maskz_permutex2var_epi32(0b11111111, a, idx, b);
55496 let e = _mm256_set_epi32(6, 100, 5, 100, 4, 100, 3, 100);
55497 assert_eq_m256i(r, e);
55498 }
55499
55500 #[simd_test(enable = "avx512f,avx512vl")]
55501 fn test_mm256_mask2_permutex2var_epi32() {
55502 let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
55503 let idx = _mm256_set_epi32(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3);
55504 let b = _mm256_set1_epi32(100);
55505 let r = _mm256_mask2_permutex2var_epi32(a, idx, 0, b);
55506 assert_eq_m256i(r, idx);
55507 let r = _mm256_mask2_permutex2var_epi32(a, idx, 0b11111111, b);
55508 let e = _mm256_set_epi32(6, 100, 5, 100, 4, 100, 3, 100);
55509 assert_eq_m256i(r, e);
55510 }
55511
55512 #[simd_test(enable = "avx512f,avx512vl")]
55513 fn test_mm_permutex2var_epi32() {
55514 let a = _mm_set_epi32(0, 1, 2, 3);
55515 let idx = _mm_set_epi32(1, 1 << 2, 2, 1 << 2);
55516 let b = _mm_set1_epi32(100);
55517 let r = _mm_permutex2var_epi32(a, idx, b);
55518 let e = _mm_set_epi32(2, 100, 1, 100);
55519 assert_eq_m128i(r, e);
55520 }
55521
55522 #[simd_test(enable = "avx512f,avx512vl")]
55523 fn test_mm_mask_permutex2var_epi32() {
55524 let a = _mm_set_epi32(0, 1, 2, 3);
55525 let idx = _mm_set_epi32(1, 1 << 2, 2, 1 << 2);
55526 let b = _mm_set1_epi32(100);
55527 let r = _mm_mask_permutex2var_epi32(a, 0, idx, b);
55528 assert_eq_m128i(r, a);
55529 let r = _mm_mask_permutex2var_epi32(a, 0b00001111, idx, b);
55530 let e = _mm_set_epi32(2, 100, 1, 100);
55531 assert_eq_m128i(r, e);
55532 }
55533
55534 #[simd_test(enable = "avx512f,avx512vl")]
55535 fn test_mm_maskz_permutex2var_epi32() {
55536 let a = _mm_set_epi32(0, 1, 2, 3);
55537 let idx = _mm_set_epi32(1, 1 << 2, 2, 1 << 2);
55538 let b = _mm_set1_epi32(100);
55539 let r = _mm_maskz_permutex2var_epi32(0, a, idx, b);
55540 assert_eq_m128i(r, _mm_setzero_si128());
55541 let r = _mm_maskz_permutex2var_epi32(0b00001111, a, idx, b);
55542 let e = _mm_set_epi32(2, 100, 1, 100);
55543 assert_eq_m128i(r, e);
55544 }
55545
55546 #[simd_test(enable = "avx512f,avx512vl")]
55547 fn test_mm_mask2_permutex2var_epi32() {
55548 let a = _mm_set_epi32(0, 1, 2, 3);
55549 let idx = _mm_set_epi32(1, 1 << 2, 2, 1 << 2);
55550 let b = _mm_set1_epi32(100);
55551 let r = _mm_mask2_permutex2var_epi32(a, idx, 0, b);
55552 assert_eq_m128i(r, idx);
55553 let r = _mm_mask2_permutex2var_epi32(a, idx, 0b00001111, b);
55554 let e = _mm_set_epi32(2, 100, 1, 100);
55555 assert_eq_m128i(r, e);
55556 }
55557
55558 #[simd_test(enable = "avx512f")]
55559 fn test_mm512_permutex2var_ps() {
55560 let a = _mm512_set_ps(
55561 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
55562 );
55563 #[rustfmt::skip]
55564 let idx = _mm512_set_epi32(
55565 1, 1 << 4, 2, 1 << 4,
55566 3, 1 << 4, 4, 1 << 4,
55567 5, 1 << 4, 6, 1 << 4,
55568 7, 1 << 4, 8, 1 << 4,
55569 );
55570 let b = _mm512_set1_ps(100.);
55571 let r = _mm512_permutex2var_ps(a, idx, b);
55572 let e = _mm512_set_ps(
55573 14., 100., 13., 100., 12., 100., 11., 100., 10., 100., 9., 100., 8., 100., 7., 100.,
55574 );
55575 assert_eq_m512(r, e);
55576 }
55577
55578 #[simd_test(enable = "avx512f")]
55579 fn test_mm512_mask_permutex2var_ps() {
55580 let a = _mm512_set_ps(
55581 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
55582 );
55583 #[rustfmt::skip]
55584 let idx = _mm512_set_epi32(
55585 1, 1 << 4, 2, 1 << 4,
55586 3, 1 << 4, 4, 1 << 4,
55587 5, 1 << 4, 6, 1 << 4,
55588 7, 1 << 4, 8, 1 << 4,
55589 );
55590 let b = _mm512_set1_ps(100.);
55591 let r = _mm512_mask_permutex2var_ps(a, 0, idx, b);
55592 assert_eq_m512(r, a);
55593 let r = _mm512_mask_permutex2var_ps(a, 0b11111111_11111111, idx, b);
55594 let e = _mm512_set_ps(
55595 14., 100., 13., 100., 12., 100., 11., 100., 10., 100., 9., 100., 8., 100., 7., 100.,
55596 );
55597 assert_eq_m512(r, e);
55598 }
55599
55600 #[simd_test(enable = "avx512f")]
55601 fn test_mm512_maskz_permutex2var_ps() {
55602 let a = _mm512_set_ps(
55603 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
55604 );
55605 #[rustfmt::skip]
55606 let idx = _mm512_set_epi32(
55607 1, 1 << 4, 2, 1 << 4,
55608 3, 1 << 4, 4, 1 << 4,
55609 5, 1 << 4, 6, 1 << 4,
55610 7, 1 << 4, 8, 1 << 4,
55611 );
55612 let b = _mm512_set1_ps(100.);
55613 let r = _mm512_maskz_permutex2var_ps(0, a, idx, b);
55614 assert_eq_m512(r, _mm512_setzero_ps());
55615 let r = _mm512_maskz_permutex2var_ps(0b00000000_11111111, a, idx, b);
55616 let e = _mm512_set_ps(
55617 0., 0., 0., 0., 0., 0., 0., 0., 10., 100., 9., 100., 8., 100., 7., 100.,
55618 );
55619 assert_eq_m512(r, e);
55620 }
55621
55622 #[simd_test(enable = "avx512f")]
55623 fn test_mm512_mask2_permutex2var_ps() {
55624 let a = _mm512_set_ps(
55625 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
55626 );
55627 #[rustfmt::skip]
55628 let idx = _mm512_set_epi32(
55629 1, 1 << 4, 2, 1 << 4,
55630 3, 1 << 4, 4, 1 << 4,
55631 5, 1 << 4, 6, 1 << 4,
55632 7, 1 << 4, 8, 1 << 4,
55633 );
55634 let b = _mm512_set1_ps(100.);
55635 let r = _mm512_mask2_permutex2var_ps(a, idx, 0, b);
55636 assert_eq_m512(r, _mm512_castsi512_ps(idx));
55637 let r = _mm512_mask2_permutex2var_ps(a, idx, 0b11111111_11111111, b);
55638 let e = _mm512_set_ps(
55639 14., 100., 13., 100., 12., 100., 11., 100., 10., 100., 9., 100., 8., 100., 7., 100.,
55640 );
55641 assert_eq_m512(r, e);
55642 }
55643
55644 #[simd_test(enable = "avx512f,avx512vl")]
55645 fn test_mm256_permutex2var_ps() {
55646 let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
55647 let idx = _mm256_set_epi32(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3);
55648 let b = _mm256_set1_ps(100.);
55649 let r = _mm256_permutex2var_ps(a, idx, b);
55650 let e = _mm256_set_ps(6., 100., 5., 100., 4., 100., 3., 100.);
55651 assert_eq_m256(r, e);
55652 }
55653
55654 #[simd_test(enable = "avx512f,avx512vl")]
55655 fn test_mm256_mask_permutex2var_ps() {
55656 let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
55657 let idx = _mm256_set_epi32(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3);
55658 let b = _mm256_set1_ps(100.);
55659 let r = _mm256_mask_permutex2var_ps(a, 0, idx, b);
55660 assert_eq_m256(r, a);
55661 let r = _mm256_mask_permutex2var_ps(a, 0b11111111, idx, b);
55662 let e = _mm256_set_ps(6., 100., 5., 100., 4., 100., 3., 100.);
55663 assert_eq_m256(r, e);
55664 }
55665
55666 #[simd_test(enable = "avx512f,avx512vl")]
55667 fn test_mm256_maskz_permutex2var_ps() {
55668 let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
55669 let idx = _mm256_set_epi32(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3);
55670 let b = _mm256_set1_ps(100.);
55671 let r = _mm256_maskz_permutex2var_ps(0, a, idx, b);
55672 assert_eq_m256(r, _mm256_setzero_ps());
55673 let r = _mm256_maskz_permutex2var_ps(0b11111111, a, idx, b);
55674 let e = _mm256_set_ps(6., 100., 5., 100., 4., 100., 3., 100.);
55675 assert_eq_m256(r, e);
55676 }
55677
55678 #[simd_test(enable = "avx512f,avx512vl")]
55679 fn test_mm256_mask2_permutex2var_ps() {
55680 let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
55681 let idx = _mm256_set_epi32(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3);
55682 let b = _mm256_set1_ps(100.);
55683 let r = _mm256_mask2_permutex2var_ps(a, idx, 0, b);
55684 assert_eq_m256(r, _mm256_castsi256_ps(idx));
55685 let r = _mm256_mask2_permutex2var_ps(a, idx, 0b11111111, b);
55686 let e = _mm256_set_ps(6., 100., 5., 100., 4., 100., 3., 100.);
55687 assert_eq_m256(r, e);
55688 }
55689
55690 #[simd_test(enable = "avx512f,avx512vl")]
55691 fn test_mm_permutex2var_ps() {
55692 let a = _mm_set_ps(0., 1., 2., 3.);
55693 let idx = _mm_set_epi32(1, 1 << 2, 2, 1 << 2);
55694 let b = _mm_set1_ps(100.);
55695 let r = _mm_permutex2var_ps(a, idx, b);
55696 let e = _mm_set_ps(2., 100., 1., 100.);
55697 assert_eq_m128(r, e);
55698 }
55699
55700 #[simd_test(enable = "avx512f,avx512vl")]
55701 fn test_mm_mask_permutex2var_ps() {
55702 let a = _mm_set_ps(0., 1., 2., 3.);
55703 let idx = _mm_set_epi32(1, 1 << 2, 2, 1 << 2);
55704 let b = _mm_set1_ps(100.);
55705 let r = _mm_mask_permutex2var_ps(a, 0, idx, b);
55706 assert_eq_m128(r, a);
55707 let r = _mm_mask_permutex2var_ps(a, 0b00001111, idx, b);
55708 let e = _mm_set_ps(2., 100., 1., 100.);
55709 assert_eq_m128(r, e);
55710 }
55711
55712 #[simd_test(enable = "avx512f,avx512vl")]
55713 fn test_mm_maskz_permutex2var_ps() {
55714 let a = _mm_set_ps(0., 1., 2., 3.);
55715 let idx = _mm_set_epi32(1, 1 << 2, 2, 1 << 2);
55716 let b = _mm_set1_ps(100.);
55717 let r = _mm_maskz_permutex2var_ps(0, a, idx, b);
55718 assert_eq_m128(r, _mm_setzero_ps());
55719 let r = _mm_maskz_permutex2var_ps(0b00001111, a, idx, b);
55720 let e = _mm_set_ps(2., 100., 1., 100.);
55721 assert_eq_m128(r, e);
55722 }
55723
55724 #[simd_test(enable = "avx512f,avx512vl")]
55725 fn test_mm_mask2_permutex2var_ps() {
55726 let a = _mm_set_ps(0., 1., 2., 3.);
55727 let idx = _mm_set_epi32(1, 1 << 2, 2, 1 << 2);
55728 let b = _mm_set1_ps(100.);
55729 let r = _mm_mask2_permutex2var_ps(a, idx, 0, b);
55730 assert_eq_m128(r, _mm_castsi128_ps(idx));
55731 let r = _mm_mask2_permutex2var_ps(a, idx, 0b00001111, b);
55732 let e = _mm_set_ps(2., 100., 1., 100.);
55733 assert_eq_m128(r, e);
55734 }
55735
55736 #[simd_test(enable = "avx512f")]
55737 const fn test_mm512_shuffle_epi32() {
55738 let a = _mm512_setr_epi32(1, 4, 5, 8, 9, 12, 13, 16, 1, 4, 5, 8, 9, 12, 13, 16);
55739 let r = _mm512_shuffle_epi32::<_MM_PERM_AADD>(a);
55740 let e = _mm512_setr_epi32(8, 8, 1, 1, 16, 16, 9, 9, 8, 8, 1, 1, 16, 16, 9, 9);
55741 assert_eq_m512i(r, e);
55742 }
55743
55744 #[simd_test(enable = "avx512f")]
55745 const fn test_mm512_mask_shuffle_epi32() {
55746 let a = _mm512_setr_epi32(1, 4, 5, 8, 9, 12, 13, 16, 1, 4, 5, 8, 9, 12, 13, 16);
55747 let r = _mm512_mask_shuffle_epi32::<_MM_PERM_AADD>(a, 0, a);
55748 assert_eq_m512i(r, a);
55749 let r = _mm512_mask_shuffle_epi32::<_MM_PERM_AADD>(a, 0b11111111_11111111, a);
55750 let e = _mm512_setr_epi32(8, 8, 1, 1, 16, 16, 9, 9, 8, 8, 1, 1, 16, 16, 9, 9);
55751 assert_eq_m512i(r, e);
55752 }
55753
55754 #[simd_test(enable = "avx512f")]
55755 const fn test_mm512_maskz_shuffle_epi32() {
55756 let a = _mm512_setr_epi32(1, 4, 5, 8, 9, 12, 13, 16, 1, 4, 5, 8, 9, 12, 13, 16);
55757 let r = _mm512_maskz_shuffle_epi32::<_MM_PERM_AADD>(0, a);
55758 assert_eq_m512i(r, _mm512_setzero_si512());
55759 let r = _mm512_maskz_shuffle_epi32::<_MM_PERM_AADD>(0b00000000_11111111, a);
55760 let e = _mm512_setr_epi32(8, 8, 1, 1, 16, 16, 9, 9, 0, 0, 0, 0, 0, 0, 0, 0);
55761 assert_eq_m512i(r, e);
55762 }
55763
55764 #[simd_test(enable = "avx512f,avx512vl")]
55765 const fn test_mm256_mask_shuffle_epi32() {
55766 let a = _mm256_set_epi32(1, 4, 5, 8, 9, 12, 13, 16);
55767 let r = _mm256_mask_shuffle_epi32::<_MM_PERM_AADD>(a, 0, a);
55768 assert_eq_m256i(r, a);
55769 let r = _mm256_mask_shuffle_epi32::<_MM_PERM_AADD>(a, 0b11111111, a);
55770 let e = _mm256_set_epi32(8, 8, 1, 1, 16, 16, 9, 9);
55771 assert_eq_m256i(r, e);
55772 }
55773
55774 #[simd_test(enable = "avx512f,avx512vl")]
55775 const fn test_mm256_maskz_shuffle_epi32() {
55776 let a = _mm256_set_epi32(1, 4, 5, 8, 9, 12, 13, 16);
55777 let r = _mm256_maskz_shuffle_epi32::<_MM_PERM_AADD>(0, a);
55778 assert_eq_m256i(r, _mm256_setzero_si256());
55779 let r = _mm256_maskz_shuffle_epi32::<_MM_PERM_AADD>(0b11111111, a);
55780 let e = _mm256_set_epi32(8, 8, 1, 1, 16, 16, 9, 9);
55781 assert_eq_m256i(r, e);
55782 }
55783
55784 #[simd_test(enable = "avx512f,avx512vl")]
55785 const fn test_mm_mask_shuffle_epi32() {
55786 let a = _mm_set_epi32(1, 4, 5, 8);
55787 let r = _mm_mask_shuffle_epi32::<_MM_PERM_AADD>(a, 0, a);
55788 assert_eq_m128i(r, a);
55789 let r = _mm_mask_shuffle_epi32::<_MM_PERM_AADD>(a, 0b00001111, a);
55790 let e = _mm_set_epi32(8, 8, 1, 1);
55791 assert_eq_m128i(r, e);
55792 }
55793
55794 #[simd_test(enable = "avx512f,avx512vl")]
55795 const fn test_mm_maskz_shuffle_epi32() {
55796 let a = _mm_set_epi32(1, 4, 5, 8);
55797 let r = _mm_maskz_shuffle_epi32::<_MM_PERM_AADD>(0, a);
55798 assert_eq_m128i(r, _mm_setzero_si128());
55799 let r = _mm_maskz_shuffle_epi32::<_MM_PERM_AADD>(0b00001111, a);
55800 let e = _mm_set_epi32(8, 8, 1, 1);
55801 assert_eq_m128i(r, e);
55802 }
55803
55804 #[simd_test(enable = "avx512f")]
55805 const fn test_mm512_shuffle_ps() {
55806 let a = _mm512_setr_ps(
55807 1., 4., 5., 8., 9., 12., 13., 16., 1., 4., 5., 8., 9., 12., 13., 16.,
55808 );
55809 let b = _mm512_setr_ps(
55810 2., 3., 6., 7., 10., 11., 14., 15., 2., 3., 6., 7., 10., 11., 14., 15.,
55811 );
55812 let r = _mm512_shuffle_ps::<0b00_00_11_11>(a, b);
55813 let e = _mm512_setr_ps(
55814 8., 8., 2., 2., 16., 16., 10., 10., 8., 8., 2., 2., 16., 16., 10., 10.,
55815 );
55816 assert_eq_m512(r, e);
55817 }
55818
55819 #[simd_test(enable = "avx512f")]
55820 const fn test_mm512_mask_shuffle_ps() {
55821 let a = _mm512_setr_ps(
55822 1., 4., 5., 8., 9., 12., 13., 16., 1., 4., 5., 8., 9., 12., 13., 16.,
55823 );
55824 let b = _mm512_setr_ps(
55825 2., 3., 6., 7., 10., 11., 14., 15., 2., 3., 6., 7., 10., 11., 14., 15.,
55826 );
55827 let r = _mm512_mask_shuffle_ps::<0b00_00_11_11>(a, 0, a, b);
55828 assert_eq_m512(r, a);
55829 let r = _mm512_mask_shuffle_ps::<0b00_00_11_11>(a, 0b11111111_11111111, a, b);
55830 let e = _mm512_setr_ps(
55831 8., 8., 2., 2., 16., 16., 10., 10., 8., 8., 2., 2., 16., 16., 10., 10.,
55832 );
55833 assert_eq_m512(r, e);
55834 }
55835
55836 #[simd_test(enable = "avx512f")]
55837 const fn test_mm512_maskz_shuffle_ps() {
55838 let a = _mm512_setr_ps(
55839 1., 4., 5., 8., 9., 12., 13., 16., 1., 4., 5., 8., 9., 12., 13., 16.,
55840 );
55841 let b = _mm512_setr_ps(
55842 2., 3., 6., 7., 10., 11., 14., 15., 2., 3., 6., 7., 10., 11., 14., 15.,
55843 );
55844 let r = _mm512_maskz_shuffle_ps::<0b00_00_11_11>(0, a, b);
55845 assert_eq_m512(r, _mm512_setzero_ps());
55846 let r = _mm512_maskz_shuffle_ps::<0b00_00_11_11>(0b00000000_11111111, a, b);
55847 let e = _mm512_setr_ps(
55848 8., 8., 2., 2., 16., 16., 10., 10., 0., 0., 0., 0., 0., 0., 0., 0.,
55849 );
55850 assert_eq_m512(r, e);
55851 }
55852
55853 #[simd_test(enable = "avx512f,avx512vl")]
55854 const fn test_mm256_mask_shuffle_ps() {
55855 let a = _mm256_set_ps(1., 4., 5., 8., 9., 12., 13., 16.);
55856 let b = _mm256_set_ps(2., 3., 6., 7., 10., 11., 14., 15.);
55857 let r = _mm256_mask_shuffle_ps::<0b11_11_11_11>(a, 0, a, b);
55858 assert_eq_m256(r, a);
55859 let r = _mm256_mask_shuffle_ps::<0b00_00_11_11>(a, 0b11111111, a, b);
55860 let e = _mm256_set_ps(7., 7., 1., 1., 15., 15., 9., 9.);
55861 assert_eq_m256(r, e);
55862 }
55863
55864 #[simd_test(enable = "avx512f,avx512vl")]
55865 const fn test_mm256_maskz_shuffle_ps() {
55866 let a = _mm256_set_ps(1., 4., 5., 8., 9., 12., 13., 16.);
55867 let b = _mm256_set_ps(2., 3., 6., 7., 10., 11., 14., 15.);
55868 let r = _mm256_maskz_shuffle_ps::<0b11_11_11_11>(0, a, b);
55869 assert_eq_m256(r, _mm256_setzero_ps());
55870 let r = _mm256_maskz_shuffle_ps::<0b00_00_11_11>(0b11111111, a, b);
55871 let e = _mm256_set_ps(7., 7., 1., 1., 15., 15., 9., 9.);
55872 assert_eq_m256(r, e);
55873 }
55874
55875 #[simd_test(enable = "avx512f,avx512vl")]
55876 const fn test_mm_mask_shuffle_ps() {
55877 let a = _mm_set_ps(1., 4., 5., 8.);
55878 let b = _mm_set_ps(2., 3., 6., 7.);
55879 let r = _mm_mask_shuffle_ps::<0b11_11_11_11>(a, 0, a, b);
55880 assert_eq_m128(r, a);
55881 let r = _mm_mask_shuffle_ps::<0b00_00_11_11>(a, 0b00001111, a, b);
55882 let e = _mm_set_ps(7., 7., 1., 1.);
55883 assert_eq_m128(r, e);
55884 }
55885
55886 #[simd_test(enable = "avx512f,avx512vl")]
55887 const fn test_mm_maskz_shuffle_ps() {
55888 let a = _mm_set_ps(1., 4., 5., 8.);
55889 let b = _mm_set_ps(2., 3., 6., 7.);
55890 let r = _mm_maskz_shuffle_ps::<0b11_11_11_11>(0, a, b);
55891 assert_eq_m128(r, _mm_setzero_ps());
55892 let r = _mm_maskz_shuffle_ps::<0b00_00_11_11>(0b00001111, a, b);
55893 let e = _mm_set_ps(7., 7., 1., 1.);
55894 assert_eq_m128(r, e);
55895 }
55896
55897 #[simd_test(enable = "avx512f")]
55898 const fn test_mm512_shuffle_i32x4() {
55899 let a = _mm512_setr_epi32(1, 4, 5, 8, 9, 12, 13, 16, 1, 4, 5, 8, 9, 12, 13, 16);
55900 let b = _mm512_setr_epi32(2, 3, 6, 7, 10, 11, 14, 15, 2, 3, 6, 7, 10, 11, 14, 15);
55901 let r = _mm512_shuffle_i32x4::<0b00_00_00_00>(a, b);
55902 let e = _mm512_setr_epi32(1, 4, 5, 8, 1, 4, 5, 8, 2, 3, 6, 7, 2, 3, 6, 7);
55903 assert_eq_m512i(r, e);
55904 }
55905
55906 #[simd_test(enable = "avx512f")]
55907 const fn test_mm512_mask_shuffle_i32x4() {
55908 let a = _mm512_setr_epi32(1, 4, 5, 8, 9, 12, 13, 16, 1, 4, 5, 8, 9, 12, 13, 16);
55909 let b = _mm512_setr_epi32(2, 3, 6, 7, 10, 11, 14, 15, 2, 3, 6, 7, 10, 11, 14, 15);
55910 let r = _mm512_mask_shuffle_i32x4::<0b00_00_00_00>(a, 0, a, b);
55911 assert_eq_m512i(r, a);
55912 let r = _mm512_mask_shuffle_i32x4::<0b00_00_00_00>(a, 0b11111111_11111111, a, b);
55913 let e = _mm512_setr_epi32(1, 4, 5, 8, 1, 4, 5, 8, 2, 3, 6, 7, 2, 3, 6, 7);
55914 assert_eq_m512i(r, e);
55915 }
55916
55917 #[simd_test(enable = "avx512f")]
55918 const fn test_mm512_maskz_shuffle_i32x4() {
55919 let a = _mm512_setr_epi32(1, 4, 5, 8, 9, 12, 13, 16, 1, 4, 5, 8, 9, 12, 13, 16);
55920 let b = _mm512_setr_epi32(2, 3, 6, 7, 10, 11, 14, 15, 2, 3, 6, 7, 10, 11, 14, 15);
55921 let r = _mm512_maskz_shuffle_i32x4::<0b00_00_00_00>(0, a, b);
55922 assert_eq_m512i(r, _mm512_setzero_si512());
55923 let r = _mm512_maskz_shuffle_i32x4::<0b00_00_00_00>(0b00000000_11111111, a, b);
55924 let e = _mm512_setr_epi32(1, 4, 5, 8, 1, 4, 5, 8, 0, 0, 0, 0, 0, 0, 0, 0);
55925 assert_eq_m512i(r, e);
55926 }
55927
55928 #[simd_test(enable = "avx512f,avx512vl")]
55929 const fn test_mm256_shuffle_i32x4() {
55930 let a = _mm256_set_epi32(1, 4, 5, 8, 9, 12, 13, 16);
55931 let b = _mm256_set_epi32(2, 3, 6, 7, 10, 11, 14, 15);
55932 let r = _mm256_shuffle_i32x4::<0b00>(a, b);
55933 let e = _mm256_set_epi32(10, 11, 14, 15, 9, 12, 13, 16);
55934 assert_eq_m256i(r, e);
55935 }
55936
55937 #[simd_test(enable = "avx512f,avx512vl")]
55938 const fn test_mm256_mask_shuffle_i32x4() {
55939 let a = _mm256_set_epi32(1, 4, 5, 8, 9, 12, 13, 16);
55940 let b = _mm256_set_epi32(2, 3, 6, 7, 10, 11, 14, 15);
55941 let r = _mm256_mask_shuffle_i32x4::<0b00>(a, 0, a, b);
55942 assert_eq_m256i(r, a);
55943 let r = _mm256_mask_shuffle_i32x4::<0b00>(a, 0b11111111, a, b);
55944 let e = _mm256_set_epi32(10, 11, 14, 15, 9, 12, 13, 16);
55945 assert_eq_m256i(r, e);
55946 }
55947
55948 #[simd_test(enable = "avx512f,avx512vl")]
55949 const fn test_mm256_maskz_shuffle_i32x4() {
55950 let a = _mm256_set_epi32(1, 4, 5, 8, 9, 12, 13, 16);
55951 let b = _mm256_set_epi32(2, 3, 6, 7, 10, 11, 14, 15);
55952 let r = _mm256_maskz_shuffle_i32x4::<0b00>(0, a, b);
55953 assert_eq_m256i(r, _mm256_setzero_si256());
55954 let r = _mm256_maskz_shuffle_i32x4::<0b00>(0b11111111, a, b);
55955 let e = _mm256_set_epi32(10, 11, 14, 15, 9, 12, 13, 16);
55956 assert_eq_m256i(r, e);
55957 }
55958
55959 #[simd_test(enable = "avx512f")]
55960 const fn test_mm512_shuffle_f32x4() {
55961 let a = _mm512_setr_ps(
55962 1., 4., 5., 8., 9., 12., 13., 16., 1., 4., 5., 8., 9., 12., 13., 16.,
55963 );
55964 let b = _mm512_setr_ps(
55965 2., 3., 6., 7., 10., 11., 14., 15., 2., 3., 6., 7., 10., 11., 14., 15.,
55966 );
55967 let r = _mm512_shuffle_f32x4::<0b00_00_00_00>(a, b);
55968 let e = _mm512_setr_ps(
55969 1., 4., 5., 8., 1., 4., 5., 8., 2., 3., 6., 7., 2., 3., 6., 7.,
55970 );
55971 assert_eq_m512(r, e);
55972 }
55973
55974 #[simd_test(enable = "avx512f")]
55975 const fn test_mm512_mask_shuffle_f32x4() {
55976 let a = _mm512_setr_ps(
55977 1., 4., 5., 8., 9., 12., 13., 16., 1., 4., 5., 8., 9., 12., 13., 16.,
55978 );
55979 let b = _mm512_setr_ps(
55980 2., 3., 6., 7., 10., 11., 14., 15., 2., 3., 6., 7., 10., 11., 14., 15.,
55981 );
55982 let r = _mm512_mask_shuffle_f32x4::<0b00_00_00_00>(a, 0, a, b);
55983 assert_eq_m512(r, a);
55984 let r = _mm512_mask_shuffle_f32x4::<0b00_00_00_00>(a, 0b11111111_11111111, a, b);
55985 let e = _mm512_setr_ps(
55986 1., 4., 5., 8., 1., 4., 5., 8., 2., 3., 6., 7., 2., 3., 6., 7.,
55987 );
55988 assert_eq_m512(r, e);
55989 }
55990
55991 #[simd_test(enable = "avx512f")]
55992 const fn test_mm512_maskz_shuffle_f32x4() {
55993 let a = _mm512_setr_ps(
55994 1., 4., 5., 8., 9., 12., 13., 16., 1., 4., 5., 8., 9., 12., 13., 16.,
55995 );
55996 let b = _mm512_setr_ps(
55997 2., 3., 6., 7., 10., 11., 14., 15., 2., 3., 6., 7., 10., 11., 14., 15.,
55998 );
55999 let r = _mm512_maskz_shuffle_f32x4::<0b00_00_00_00>(0, a, b);
56000 assert_eq_m512(r, _mm512_setzero_ps());
56001 let r = _mm512_maskz_shuffle_f32x4::<0b00_00_00_00>(0b00000000_11111111, a, b);
56002 let e = _mm512_setr_ps(
56003 1., 4., 5., 8., 1., 4., 5., 8., 0., 0., 0., 0., 0., 0., 0., 0.,
56004 );
56005 assert_eq_m512(r, e);
56006 }
56007
56008 #[simd_test(enable = "avx512f,avx512vl")]
56009 const fn test_mm256_shuffle_f32x4() {
56010 let a = _mm256_set_ps(1., 4., 5., 8., 9., 12., 13., 16.);
56011 let b = _mm256_set_ps(2., 3., 6., 7., 10., 11., 14., 15.);
56012 let r = _mm256_shuffle_f32x4::<0b00>(a, b);
56013 let e = _mm256_set_ps(10., 11., 14., 15., 9., 12., 13., 16.);
56014 assert_eq_m256(r, e);
56015 }
56016
56017 #[simd_test(enable = "avx512f,avx512vl")]
56018 const fn test_mm256_mask_shuffle_f32x4() {
56019 let a = _mm256_set_ps(1., 4., 5., 8., 9., 12., 13., 16.);
56020 let b = _mm256_set_ps(2., 3., 6., 7., 10., 11., 14., 15.);
56021 let r = _mm256_mask_shuffle_f32x4::<0b00>(a, 0, a, b);
56022 assert_eq_m256(r, a);
56023 let r = _mm256_mask_shuffle_f32x4::<0b00>(a, 0b11111111, a, b);
56024 let e = _mm256_set_ps(10., 11., 14., 15., 9., 12., 13., 16.);
56025 assert_eq_m256(r, e);
56026 }
56027
56028 #[simd_test(enable = "avx512f,avx512vl")]
56029 const fn test_mm256_maskz_shuffle_f32x4() {
56030 let a = _mm256_set_ps(1., 4., 5., 8., 9., 12., 13., 16.);
56031 let b = _mm256_set_ps(2., 3., 6., 7., 10., 11., 14., 15.);
56032 let r = _mm256_maskz_shuffle_f32x4::<0b00>(0, a, b);
56033 assert_eq_m256(r, _mm256_setzero_ps());
56034 let r = _mm256_maskz_shuffle_f32x4::<0b00>(0b11111111, a, b);
56035 let e = _mm256_set_ps(10., 11., 14., 15., 9., 12., 13., 16.);
56036 assert_eq_m256(r, e);
56037 }
56038
56039 #[simd_test(enable = "avx512f")]
56040 const fn test_mm512_extractf32x4_ps() {
56041 let a = _mm512_setr_ps(
56042 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
56043 );
56044 let r = _mm512_extractf32x4_ps::<1>(a);
56045 let e = _mm_setr_ps(5., 6., 7., 8.);
56046 assert_eq_m128(r, e);
56047 }
56048
56049 #[simd_test(enable = "avx512f")]
56050 const fn test_mm512_mask_extractf32x4_ps() {
56051 let a = _mm512_setr_ps(
56052 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
56053 );
56054 let src = _mm_set1_ps(100.);
56055 let r = _mm512_mask_extractf32x4_ps::<1>(src, 0, a);
56056 assert_eq_m128(r, src);
56057 let r = _mm512_mask_extractf32x4_ps::<1>(src, 0b11111111, a);
56058 let e = _mm_setr_ps(5., 6., 7., 8.);
56059 assert_eq_m128(r, e);
56060 }
56061
56062 #[simd_test(enable = "avx512f")]
56063 const fn test_mm512_maskz_extractf32x4_ps() {
56064 let a = _mm512_setr_ps(
56065 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
56066 );
56067 let r = _mm512_maskz_extractf32x4_ps::<1>(0, a);
56068 assert_eq_m128(r, _mm_setzero_ps());
56069 let r = _mm512_maskz_extractf32x4_ps::<1>(0b00000001, a);
56070 let e = _mm_setr_ps(5., 0., 0., 0.);
56071 assert_eq_m128(r, e);
56072 }
56073
56074 #[simd_test(enable = "avx512f,avx512vl")]
56075 const fn test_mm256_extractf32x4_ps() {
56076 let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
56077 let r = _mm256_extractf32x4_ps::<1>(a);
56078 let e = _mm_set_ps(1., 2., 3., 4.);
56079 assert_eq_m128(r, e);
56080 }
56081
56082 #[simd_test(enable = "avx512f,avx512vl")]
56083 const fn test_mm256_mask_extractf32x4_ps() {
56084 let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
56085 let src = _mm_set1_ps(100.);
56086 let r = _mm256_mask_extractf32x4_ps::<1>(src, 0, a);
56087 assert_eq_m128(r, src);
56088 let r = _mm256_mask_extractf32x4_ps::<1>(src, 0b00001111, a);
56089 let e = _mm_set_ps(1., 2., 3., 4.);
56090 assert_eq_m128(r, e);
56091 }
56092
56093 #[simd_test(enable = "avx512f,avx512vl")]
56094 const fn test_mm256_maskz_extractf32x4_ps() {
56095 let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
56096 let r = _mm256_maskz_extractf32x4_ps::<1>(0, a);
56097 assert_eq_m128(r, _mm_setzero_ps());
56098 let r = _mm256_maskz_extractf32x4_ps::<1>(0b00001111, a);
56099 let e = _mm_set_ps(1., 2., 3., 4.);
56100 assert_eq_m128(r, e);
56101 }
56102
56103 #[simd_test(enable = "avx512f")]
56104 const fn test_mm512_extracti32x4_epi32() {
56105 let a = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
56106 let r = _mm512_extracti32x4_epi32::<1>(a);
56107 let e = _mm_setr_epi32(5, 6, 7, 8);
56108 assert_eq_m128i(r, e);
56109 }
56110
56111 #[simd_test(enable = "avx512f")]
56112 const fn test_mm512_mask_extracti32x4_epi32() {
56113 let a = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
56114 let src = _mm_set1_epi32(100);
56115 let r = _mm512_mask_extracti32x4_epi32::<1>(src, 0, a);
56116 assert_eq_m128i(r, src);
56117 let r = _mm512_mask_extracti32x4_epi32::<1>(src, 0b11111111, a);
56118 let e = _mm_setr_epi32(5, 6, 7, 8);
56119 assert_eq_m128i(r, e);
56120 }
56121
56122 #[simd_test(enable = "avx512f,avx512vl")]
56123 const fn test_mm512_maskz_extracti32x4_epi32() {
56124 let a = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
56125 let r = _mm512_maskz_extracti32x4_epi32::<1>(0, a);
56126 assert_eq_m128i(r, _mm_setzero_si128());
56127 let r = _mm512_maskz_extracti32x4_epi32::<1>(0b00000001, a);
56128 let e = _mm_setr_epi32(5, 0, 0, 0);
56129 assert_eq_m128i(r, e);
56130 }
56131
56132 #[simd_test(enable = "avx512f,avx512vl")]
56133 const fn test_mm256_extracti32x4_epi32() {
56134 let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
56135 let r = _mm256_extracti32x4_epi32::<1>(a);
56136 let e = _mm_set_epi32(1, 2, 3, 4);
56137 assert_eq_m128i(r, e);
56138 }
56139
56140 #[simd_test(enable = "avx512f,avx512vl")]
56141 const fn test_mm256_mask_extracti32x4_epi32() {
56142 let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
56143 let src = _mm_set1_epi32(100);
56144 let r = _mm256_mask_extracti32x4_epi32::<1>(src, 0, a);
56145 assert_eq_m128i(r, src);
56146 let r = _mm256_mask_extracti32x4_epi32::<1>(src, 0b00001111, a);
56147 let e = _mm_set_epi32(1, 2, 3, 4);
56148 assert_eq_m128i(r, e);
56149 }
56150
56151 #[simd_test(enable = "avx512f,avx512vl")]
56152 const fn test_mm256_maskz_extracti32x4_epi32() {
56153 let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
56154 let r = _mm256_maskz_extracti32x4_epi32::<1>(0, a);
56155 assert_eq_m128i(r, _mm_setzero_si128());
56156 let r = _mm256_maskz_extracti32x4_epi32::<1>(0b00001111, a);
56157 let e = _mm_set_epi32(1, 2, 3, 4);
56158 assert_eq_m128i(r, e);
56159 }
56160
56161 #[simd_test(enable = "avx512f")]
56162 const fn test_mm512_moveldup_ps() {
56163 let a = _mm512_setr_ps(
56164 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
56165 );
56166 let r = _mm512_moveldup_ps(a);
56167 let e = _mm512_setr_ps(
56168 1., 1., 3., 3., 5., 5., 7., 7., 9., 9., 11., 11., 13., 13., 15., 15.,
56169 );
56170 assert_eq_m512(r, e);
56171 }
56172
56173 #[simd_test(enable = "avx512f")]
56174 const fn test_mm512_mask_moveldup_ps() {
56175 let a = _mm512_setr_ps(
56176 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
56177 );
56178 let r = _mm512_mask_moveldup_ps(a, 0, a);
56179 assert_eq_m512(r, a);
56180 let r = _mm512_mask_moveldup_ps(a, 0b11111111_11111111, a);
56181 let e = _mm512_setr_ps(
56182 1., 1., 3., 3., 5., 5., 7., 7., 9., 9., 11., 11., 13., 13., 15., 15.,
56183 );
56184 assert_eq_m512(r, e);
56185 }
56186
56187 #[simd_test(enable = "avx512f")]
56188 const fn test_mm512_maskz_moveldup_ps() {
56189 let a = _mm512_setr_ps(
56190 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
56191 );
56192 let r = _mm512_maskz_moveldup_ps(0, a);
56193 assert_eq_m512(r, _mm512_setzero_ps());
56194 let r = _mm512_maskz_moveldup_ps(0b00000000_11111111, a);
56195 let e = _mm512_setr_ps(
56196 1., 1., 3., 3., 5., 5., 7., 7., 0., 0., 0., 0., 0., 0., 0., 0.,
56197 );
56198 assert_eq_m512(r, e);
56199 }
56200
56201 #[simd_test(enable = "avx512f,avx512vl")]
56202 const fn test_mm256_mask_moveldup_ps() {
56203 let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
56204 let r = _mm256_mask_moveldup_ps(a, 0, a);
56205 assert_eq_m256(r, a);
56206 let r = _mm256_mask_moveldup_ps(a, 0b11111111, a);
56207 let e = _mm256_set_ps(2., 2., 4., 4., 6., 6., 8., 8.);
56208 assert_eq_m256(r, e);
56209 }
56210
56211 #[simd_test(enable = "avx512f,avx512vl")]
56212 const fn test_mm256_maskz_moveldup_ps() {
56213 let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
56214 let r = _mm256_maskz_moveldup_ps(0, a);
56215 assert_eq_m256(r, _mm256_setzero_ps());
56216 let r = _mm256_maskz_moveldup_ps(0b11111111, a);
56217 let e = _mm256_set_ps(2., 2., 4., 4., 6., 6., 8., 8.);
56218 assert_eq_m256(r, e);
56219 }
56220
56221 #[simd_test(enable = "avx512f,avx512vl")]
56222 const fn test_mm_mask_moveldup_ps() {
56223 let a = _mm_set_ps(1., 2., 3., 4.);
56224 let r = _mm_mask_moveldup_ps(a, 0, a);
56225 assert_eq_m128(r, a);
56226 let r = _mm_mask_moveldup_ps(a, 0b00001111, a);
56227 let e = _mm_set_ps(2., 2., 4., 4.);
56228 assert_eq_m128(r, e);
56229 }
56230
56231 #[simd_test(enable = "avx512f,avx512vl")]
56232 const fn test_mm_maskz_moveldup_ps() {
56233 let a = _mm_set_ps(1., 2., 3., 4.);
56234 let r = _mm_maskz_moveldup_ps(0, a);
56235 assert_eq_m128(r, _mm_setzero_ps());
56236 let r = _mm_maskz_moveldup_ps(0b00001111, a);
56237 let e = _mm_set_ps(2., 2., 4., 4.);
56238 assert_eq_m128(r, e);
56239 }
56240
56241 #[simd_test(enable = "avx512f")]
56242 const fn test_mm512_movehdup_ps() {
56243 let a = _mm512_setr_ps(
56244 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
56245 );
56246 let r = _mm512_movehdup_ps(a);
56247 let e = _mm512_setr_ps(
56248 2., 2., 4., 4., 6., 6., 8., 8., 10., 10., 12., 12., 14., 14., 16., 16.,
56249 );
56250 assert_eq_m512(r, e);
56251 }
56252
56253 #[simd_test(enable = "avx512f")]
56254 const fn test_mm512_mask_movehdup_ps() {
56255 let a = _mm512_setr_ps(
56256 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
56257 );
56258 let r = _mm512_mask_movehdup_ps(a, 0, a);
56259 assert_eq_m512(r, a);
56260 let r = _mm512_mask_movehdup_ps(a, 0b11111111_11111111, a);
56261 let e = _mm512_setr_ps(
56262 2., 2., 4., 4., 6., 6., 8., 8., 10., 10., 12., 12., 14., 14., 16., 16.,
56263 );
56264 assert_eq_m512(r, e);
56265 }
56266
56267 #[simd_test(enable = "avx512f")]
56268 const fn test_mm512_maskz_movehdup_ps() {
56269 let a = _mm512_setr_ps(
56270 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
56271 );
56272 let r = _mm512_maskz_movehdup_ps(0, a);
56273 assert_eq_m512(r, _mm512_setzero_ps());
56274 let r = _mm512_maskz_movehdup_ps(0b00000000_11111111, a);
56275 let e = _mm512_setr_ps(
56276 2., 2., 4., 4., 6., 6., 8., 8., 0., 0., 0., 0., 0., 0., 0., 0.,
56277 );
56278 assert_eq_m512(r, e);
56279 }
56280
56281 #[simd_test(enable = "avx512f,avx512vl")]
56282 const fn test_mm256_mask_movehdup_ps() {
56283 let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
56284 let r = _mm256_mask_movehdup_ps(a, 0, a);
56285 assert_eq_m256(r, a);
56286 let r = _mm256_mask_movehdup_ps(a, 0b11111111, a);
56287 let e = _mm256_set_ps(1., 1., 3., 3., 5., 5., 7., 7.);
56288 assert_eq_m256(r, e);
56289 }
56290
56291 #[simd_test(enable = "avx512f,avx512vl")]
56292 const fn test_mm256_maskz_movehdup_ps() {
56293 let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
56294 let r = _mm256_maskz_movehdup_ps(0, a);
56295 assert_eq_m256(r, _mm256_setzero_ps());
56296 let r = _mm256_maskz_movehdup_ps(0b11111111, a);
56297 let e = _mm256_set_ps(1., 1., 3., 3., 5., 5., 7., 7.);
56298 assert_eq_m256(r, e);
56299 }
56300
56301 #[simd_test(enable = "avx512f,avx512vl")]
56302 const fn test_mm_mask_movehdup_ps() {
56303 let a = _mm_set_ps(1., 2., 3., 4.);
56304 let r = _mm_mask_movehdup_ps(a, 0, a);
56305 assert_eq_m128(r, a);
56306 let r = _mm_mask_movehdup_ps(a, 0b00001111, a);
56307 let e = _mm_set_ps(1., 1., 3., 3.);
56308 assert_eq_m128(r, e);
56309 }
56310
56311 #[simd_test(enable = "avx512f,avx512vl")]
56312 const fn test_mm_maskz_movehdup_ps() {
56313 let a = _mm_set_ps(1., 2., 3., 4.);
56314 let r = _mm_maskz_movehdup_ps(0, a);
56315 assert_eq_m128(r, _mm_setzero_ps());
56316 let r = _mm_maskz_movehdup_ps(0b00001111, a);
56317 let e = _mm_set_ps(1., 1., 3., 3.);
56318 assert_eq_m128(r, e);
56319 }
56320
56321 #[simd_test(enable = "avx512f")]
56322 const fn test_mm512_inserti32x4() {
56323 let a = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
56324 let b = _mm_setr_epi32(17, 18, 19, 20);
56325 let r = _mm512_inserti32x4::<0>(a, b);
56326 let e = _mm512_setr_epi32(17, 18, 19, 20, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
56327 assert_eq_m512i(r, e);
56328 }
56329
56330 #[simd_test(enable = "avx512f")]
56331 const fn test_mm512_mask_inserti32x4() {
56332 let a = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
56333 let b = _mm_setr_epi32(17, 18, 19, 20);
56334 let r = _mm512_mask_inserti32x4::<0>(a, 0, a, b);
56335 assert_eq_m512i(r, a);
56336 let r = _mm512_mask_inserti32x4::<0>(a, 0b11111111_11111111, a, b);
56337 let e = _mm512_setr_epi32(17, 18, 19, 20, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
56338 assert_eq_m512i(r, e);
56339 }
56340
56341 #[simd_test(enable = "avx512f")]
56342 const fn test_mm512_maskz_inserti32x4() {
56343 let a = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
56344 let b = _mm_setr_epi32(17, 18, 19, 20);
56345 let r = _mm512_maskz_inserti32x4::<0>(0, a, b);
56346 assert_eq_m512i(r, _mm512_setzero_si512());
56347 let r = _mm512_maskz_inserti32x4::<0>(0b00000000_11111111, a, b);
56348 let e = _mm512_setr_epi32(17, 18, 19, 20, 5, 6, 7, 8, 0, 0, 0, 0, 0, 0, 0, 0);
56349 assert_eq_m512i(r, e);
56350 }
56351
56352 #[simd_test(enable = "avx512f,avx512vl")]
56353 const fn test_mm256_inserti32x4() {
56354 let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
56355 let b = _mm_set_epi32(17, 18, 19, 20);
56356 let r = _mm256_inserti32x4::<1>(a, b);
56357 let e = _mm256_set_epi32(17, 18, 19, 20, 5, 6, 7, 8);
56358 assert_eq_m256i(r, e);
56359 }
56360
56361 #[simd_test(enable = "avx512f,avx512vl")]
56362 const fn test_mm256_mask_inserti32x4() {
56363 let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
56364 let b = _mm_set_epi32(17, 18, 19, 20);
56365 let r = _mm256_mask_inserti32x4::<0>(a, 0, a, b);
56366 assert_eq_m256i(r, a);
56367 let r = _mm256_mask_inserti32x4::<1>(a, 0b11111111, a, b);
56368 let e = _mm256_set_epi32(17, 18, 19, 20, 5, 6, 7, 8);
56369 assert_eq_m256i(r, e);
56370 }
56371
56372 #[simd_test(enable = "avx512f,avx512vl")]
56373 const fn test_mm256_maskz_inserti32x4() {
56374 let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
56375 let b = _mm_set_epi32(17, 18, 19, 20);
56376 let r = _mm256_maskz_inserti32x4::<0>(0, a, b);
56377 assert_eq_m256i(r, _mm256_setzero_si256());
56378 let r = _mm256_maskz_inserti32x4::<1>(0b11111111, a, b);
56379 let e = _mm256_set_epi32(17, 18, 19, 20, 5, 6, 7, 8);
56380 assert_eq_m256i(r, e);
56381 }
56382
56383 #[simd_test(enable = "avx512f")]
56384 const fn test_mm512_insertf32x4() {
56385 let a = _mm512_setr_ps(
56386 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
56387 );
56388 let b = _mm_setr_ps(17., 18., 19., 20.);
56389 let r = _mm512_insertf32x4::<0>(a, b);
56390 let e = _mm512_setr_ps(
56391 17., 18., 19., 20., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
56392 );
56393 assert_eq_m512(r, e);
56394 }
56395
56396 #[simd_test(enable = "avx512f")]
56397 const fn test_mm512_mask_insertf32x4() {
56398 let a = _mm512_setr_ps(
56399 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
56400 );
56401 let b = _mm_setr_ps(17., 18., 19., 20.);
56402 let r = _mm512_mask_insertf32x4::<0>(a, 0, a, b);
56403 assert_eq_m512(r, a);
56404 let r = _mm512_mask_insertf32x4::<0>(a, 0b11111111_11111111, a, b);
56405 let e = _mm512_setr_ps(
56406 17., 18., 19., 20., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
56407 );
56408 assert_eq_m512(r, e);
56409 }
56410
56411 #[simd_test(enable = "avx512f")]
56412 const fn test_mm512_maskz_insertf32x4() {
56413 let a = _mm512_setr_ps(
56414 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
56415 );
56416 let b = _mm_setr_ps(17., 18., 19., 20.);
56417 let r = _mm512_maskz_insertf32x4::<0>(0, a, b);
56418 assert_eq_m512(r, _mm512_setzero_ps());
56419 let r = _mm512_maskz_insertf32x4::<0>(0b00000000_11111111, a, b);
56420 let e = _mm512_setr_ps(
56421 17., 18., 19., 20., 5., 6., 7., 8., 0., 0., 0., 0., 0., 0., 0., 0.,
56422 );
56423 assert_eq_m512(r, e);
56424 }
56425
56426 #[simd_test(enable = "avx512f,avx512vl")]
56427 const fn test_mm256_insertf32x4() {
56428 let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
56429 let b = _mm_set_ps(17., 18., 19., 20.);
56430 let r = _mm256_insertf32x4::<1>(a, b);
56431 let e = _mm256_set_ps(17., 18., 19., 20., 5., 6., 7., 8.);
56432 assert_eq_m256(r, e);
56433 }
56434
56435 #[simd_test(enable = "avx512f,avx512vl")]
56436 const fn test_mm256_mask_insertf32x4() {
56437 let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
56438 let b = _mm_set_ps(17., 18., 19., 20.);
56439 let r = _mm256_mask_insertf32x4::<0>(a, 0, a, b);
56440 assert_eq_m256(r, a);
56441 let r = _mm256_mask_insertf32x4::<1>(a, 0b11111111, a, b);
56442 let e = _mm256_set_ps(17., 18., 19., 20., 5., 6., 7., 8.);
56443 assert_eq_m256(r, e);
56444 }
56445
56446 #[simd_test(enable = "avx512f,avx512vl")]
56447 const fn test_mm256_maskz_insertf32x4() {
56448 let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
56449 let b = _mm_set_ps(17., 18., 19., 20.);
56450 let r = _mm256_maskz_insertf32x4::<0>(0, a, b);
56451 assert_eq_m256(r, _mm256_setzero_ps());
56452 let r = _mm256_maskz_insertf32x4::<1>(0b11111111, a, b);
56453 let e = _mm256_set_ps(17., 18., 19., 20., 5., 6., 7., 8.);
56454 assert_eq_m256(r, e);
56455 }
56456
56457 #[simd_test(enable = "avx512f")]
56458 const fn test_mm512_castps128_ps512() {
56459 let a = _mm_setr_ps(17., 18., 19., 20.);
56460 let r = _mm512_castps128_ps512(a);
56461 assert_eq_m128(_mm512_castps512_ps128(r), a);
56462 }
56463
56464 #[simd_test(enable = "avx512f")]
56465 const fn test_mm512_castps256_ps512() {
56466 let a = _mm256_setr_ps(17., 18., 19., 20., 21., 22., 23., 24.);
56467 let r = _mm512_castps256_ps512(a);
56468 assert_eq_m256(_mm512_castps512_ps256(r), a);
56469 }
56470
56471 #[simd_test(enable = "avx512f")]
56472 const fn test_mm512_zextps128_ps512() {
56473 let a = _mm_setr_ps(17., 18., 19., 20.);
56474 let r = _mm512_zextps128_ps512(a);
56475 let e = _mm512_setr_ps(
56476 17., 18., 19., 20., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
56477 );
56478 assert_eq_m512(r, e);
56479 }
56480
56481 #[simd_test(enable = "avx512f")]
56482 const fn test_mm512_zextps256_ps512() {
56483 let a = _mm256_setr_ps(17., 18., 19., 20., 21., 22., 23., 24.);
56484 let r = _mm512_zextps256_ps512(a);
56485 let e = _mm512_setr_ps(
56486 17., 18., 19., 20., 21., 22., 23., 24., 0., 0., 0., 0., 0., 0., 0., 0.,
56487 );
56488 assert_eq_m512(r, e);
56489 }
56490
56491 #[simd_test(enable = "avx512f")]
56492 const fn test_mm512_castps512_ps128() {
56493 let a = _mm512_setr_ps(
56494 17., 18., 19., 20., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
56495 );
56496 let r = _mm512_castps512_ps128(a);
56497 let e = _mm_setr_ps(17., 18., 19., 20.);
56498 assert_eq_m128(r, e);
56499 }
56500
56501 #[simd_test(enable = "avx512f")]
56502 const fn test_mm512_castps512_ps256() {
56503 let a = _mm512_setr_ps(
56504 17., 18., 19., 20., 21., 22., 23., 24., -1., -1., -1., -1., -1., -1., -1., -1.,
56505 );
56506 let r = _mm512_castps512_ps256(a);
56507 let e = _mm256_setr_ps(17., 18., 19., 20., 21., 22., 23., 24.);
56508 assert_eq_m256(r, e);
56509 }
56510
56511 #[simd_test(enable = "avx512f")]
56512 const fn test_mm512_castps_pd() {
56513 let a = _mm512_set1_ps(1.);
56514 let r = _mm512_castps_pd(a);
56515 let e = _mm512_set1_pd(0.007812501848093234);
56516 assert_eq_m512d(r, e);
56517 }
56518
56519 #[simd_test(enable = "avx512f")]
56520 const fn test_mm512_castps_si512() {
56521 let a = _mm512_set1_ps(1.);
56522 let r = _mm512_castps_si512(a);
56523 let e = _mm512_set1_epi32(1065353216);
56524 assert_eq_m512i(r, e);
56525 }
56526
56527 #[simd_test(enable = "avx512f")]
56528 const fn test_mm512_broadcastd_epi32() {
56529 let a = _mm_set_epi32(17, 18, 19, 20);
56530 let r = _mm512_broadcastd_epi32(a);
56531 let e = _mm512_set1_epi32(20);
56532 assert_eq_m512i(r, e);
56533 }
56534
56535 #[simd_test(enable = "avx512f")]
56536 const fn test_mm512_mask_broadcastd_epi32() {
56537 let src = _mm512_set1_epi32(20);
56538 let a = _mm_set_epi32(17, 18, 19, 20);
56539 let r = _mm512_mask_broadcastd_epi32(src, 0, a);
56540 assert_eq_m512i(r, src);
56541 let r = _mm512_mask_broadcastd_epi32(src, 0b11111111_11111111, a);
56542 let e = _mm512_set1_epi32(20);
56543 assert_eq_m512i(r, e);
56544 }
56545
56546 #[simd_test(enable = "avx512f")]
56547 const fn test_mm512_maskz_broadcastd_epi32() {
56548 let a = _mm_set_epi32(17, 18, 19, 20);
56549 let r = _mm512_maskz_broadcastd_epi32(0, a);
56550 assert_eq_m512i(r, _mm512_setzero_si512());
56551 let r = _mm512_maskz_broadcastd_epi32(0b00000000_11111111, a);
56552 let e = _mm512_setr_epi32(20, 20, 20, 20, 20, 20, 20, 20, 0, 0, 0, 0, 0, 0, 0, 0);
56553 assert_eq_m512i(r, e);
56554 }
56555
56556 #[simd_test(enable = "avx512f,avx512vl")]
56557 const fn test_mm256_mask_broadcastd_epi32() {
56558 let src = _mm256_set1_epi32(20);
56559 let a = _mm_set_epi32(17, 18, 19, 20);
56560 let r = _mm256_mask_broadcastd_epi32(src, 0, a);
56561 assert_eq_m256i(r, src);
56562 let r = _mm256_mask_broadcastd_epi32(src, 0b11111111, a);
56563 let e = _mm256_set1_epi32(20);
56564 assert_eq_m256i(r, e);
56565 }
56566
56567 #[simd_test(enable = "avx512f,avx512vl")]
56568 const fn test_mm256_maskz_broadcastd_epi32() {
56569 let a = _mm_set_epi32(17, 18, 19, 20);
56570 let r = _mm256_maskz_broadcastd_epi32(0, a);
56571 assert_eq_m256i(r, _mm256_setzero_si256());
56572 let r = _mm256_maskz_broadcastd_epi32(0b11111111, a);
56573 let e = _mm256_set1_epi32(20);
56574 assert_eq_m256i(r, e);
56575 }
56576
56577 #[simd_test(enable = "avx512f,avx512vl")]
56578 const fn test_mm_mask_broadcastd_epi32() {
56579 let src = _mm_set1_epi32(20);
56580 let a = _mm_set_epi32(17, 18, 19, 20);
56581 let r = _mm_mask_broadcastd_epi32(src, 0, a);
56582 assert_eq_m128i(r, src);
56583 let r = _mm_mask_broadcastd_epi32(src, 0b00001111, a);
56584 let e = _mm_set1_epi32(20);
56585 assert_eq_m128i(r, e);
56586 }
56587
56588 #[simd_test(enable = "avx512f,avx512vl")]
56589 const fn test_mm_maskz_broadcastd_epi32() {
56590 let a = _mm_set_epi32(17, 18, 19, 20);
56591 let r = _mm_maskz_broadcastd_epi32(0, a);
56592 assert_eq_m128i(r, _mm_setzero_si128());
56593 let r = _mm_maskz_broadcastd_epi32(0b00001111, a);
56594 let e = _mm_set1_epi32(20);
56595 assert_eq_m128i(r, e);
56596 }
56597
56598 #[simd_test(enable = "avx512f")]
56599 const fn test_mm512_broadcastss_ps() {
56600 let a = _mm_set_ps(17., 18., 19., 20.);
56601 let r = _mm512_broadcastss_ps(a);
56602 let e = _mm512_set1_ps(20.);
56603 assert_eq_m512(r, e);
56604 }
56605
56606 #[simd_test(enable = "avx512f")]
56607 const fn test_mm512_mask_broadcastss_ps() {
56608 let src = _mm512_set1_ps(20.);
56609 let a = _mm_set_ps(17., 18., 19., 20.);
56610 let r = _mm512_mask_broadcastss_ps(src, 0, a);
56611 assert_eq_m512(r, src);
56612 let r = _mm512_mask_broadcastss_ps(src, 0b11111111_11111111, a);
56613 let e = _mm512_set1_ps(20.);
56614 assert_eq_m512(r, e);
56615 }
56616
56617 #[simd_test(enable = "avx512f")]
56618 const fn test_mm512_maskz_broadcastss_ps() {
56619 let a = _mm_set_ps(17., 18., 19., 20.);
56620 let r = _mm512_maskz_broadcastss_ps(0, a);
56621 assert_eq_m512(r, _mm512_setzero_ps());
56622 let r = _mm512_maskz_broadcastss_ps(0b00000000_11111111, a);
56623 let e = _mm512_setr_ps(
56624 20., 20., 20., 20., 20., 20., 20., 20., 0., 0., 0., 0., 0., 0., 0., 0.,
56625 );
56626 assert_eq_m512(r, e);
56627 }
56628
56629 #[simd_test(enable = "avx512f,avx512vl")]
56630 const fn test_mm256_mask_broadcastss_ps() {
56631 let src = _mm256_set1_ps(20.);
56632 let a = _mm_set_ps(17., 18., 19., 20.);
56633 let r = _mm256_mask_broadcastss_ps(src, 0, a);
56634 assert_eq_m256(r, src);
56635 let r = _mm256_mask_broadcastss_ps(src, 0b11111111, a);
56636 let e = _mm256_set1_ps(20.);
56637 assert_eq_m256(r, e);
56638 }
56639
56640 #[simd_test(enable = "avx512f,avx512vl")]
56641 const fn test_mm256_maskz_broadcastss_ps() {
56642 let a = _mm_set_ps(17., 18., 19., 20.);
56643 let r = _mm256_maskz_broadcastss_ps(0, a);
56644 assert_eq_m256(r, _mm256_setzero_ps());
56645 let r = _mm256_maskz_broadcastss_ps(0b11111111, a);
56646 let e = _mm256_set1_ps(20.);
56647 assert_eq_m256(r, e);
56648 }
56649
56650 #[simd_test(enable = "avx512f,avx512vl")]
56651 const fn test_mm_mask_broadcastss_ps() {
56652 let src = _mm_set1_ps(20.);
56653 let a = _mm_set_ps(17., 18., 19., 20.);
56654 let r = _mm_mask_broadcastss_ps(src, 0, a);
56655 assert_eq_m128(r, src);
56656 let r = _mm_mask_broadcastss_ps(src, 0b00001111, a);
56657 let e = _mm_set1_ps(20.);
56658 assert_eq_m128(r, e);
56659 }
56660
56661 #[simd_test(enable = "avx512f,avx512vl")]
56662 const fn test_mm_maskz_broadcastss_ps() {
56663 let a = _mm_set_ps(17., 18., 19., 20.);
56664 let r = _mm_maskz_broadcastss_ps(0, a);
56665 assert_eq_m128(r, _mm_setzero_ps());
56666 let r = _mm_maskz_broadcastss_ps(0b00001111, a);
56667 let e = _mm_set1_ps(20.);
56668 assert_eq_m128(r, e);
56669 }
56670
56671 #[simd_test(enable = "avx512f")]
56672 const fn test_mm512_broadcast_i32x4() {
56673 let a = _mm_set_epi32(17, 18, 19, 20);
56674 let r = _mm512_broadcast_i32x4(a);
56675 let e = _mm512_set_epi32(
56676 17, 18, 19, 20, 17, 18, 19, 20, 17, 18, 19, 20, 17, 18, 19, 20,
56677 );
56678 assert_eq_m512i(r, e);
56679 }
56680
56681 #[simd_test(enable = "avx512f")]
56682 const fn test_mm512_mask_broadcast_i32x4() {
56683 let src = _mm512_set1_epi32(20);
56684 let a = _mm_set_epi32(17, 18, 19, 20);
56685 let r = _mm512_mask_broadcast_i32x4(src, 0, a);
56686 assert_eq_m512i(r, src);
56687 let r = _mm512_mask_broadcast_i32x4(src, 0b11111111_11111111, a);
56688 let e = _mm512_set_epi32(
56689 17, 18, 19, 20, 17, 18, 19, 20, 17, 18, 19, 20, 17, 18, 19, 20,
56690 );
56691 assert_eq_m512i(r, e);
56692 }
56693
56694 #[simd_test(enable = "avx512f")]
56695 const fn test_mm512_maskz_broadcast_i32x4() {
56696 let a = _mm_set_epi32(17, 18, 19, 20);
56697 let r = _mm512_maskz_broadcast_i32x4(0, a);
56698 assert_eq_m512i(r, _mm512_setzero_si512());
56699 let r = _mm512_maskz_broadcast_i32x4(0b00000000_11111111, a);
56700 let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 17, 18, 19, 20, 17, 18, 19, 20);
56701 assert_eq_m512i(r, e);
56702 }
56703
56704 #[simd_test(enable = "avx512f,avx512vl")]
56705 const fn test_mm256_broadcast_i32x4() {
56706 let a = _mm_set_epi32(17, 18, 19, 20);
56707 let r = _mm256_broadcast_i32x4(a);
56708 let e = _mm256_set_epi32(17, 18, 19, 20, 17, 18, 19, 20);
56709 assert_eq_m256i(r, e);
56710 }
56711
56712 #[simd_test(enable = "avx512f,avx512vl")]
56713 const fn test_mm256_mask_broadcast_i32x4() {
56714 let src = _mm256_set1_epi32(20);
56715 let a = _mm_set_epi32(17, 18, 19, 20);
56716 let r = _mm256_mask_broadcast_i32x4(src, 0, a);
56717 assert_eq_m256i(r, src);
56718 let r = _mm256_mask_broadcast_i32x4(src, 0b11111111, a);
56719 let e = _mm256_set_epi32(17, 18, 19, 20, 17, 18, 19, 20);
56720 assert_eq_m256i(r, e);
56721 }
56722
56723 #[simd_test(enable = "avx512f,avx512vl")]
56724 const fn test_mm256_maskz_broadcast_i32x4() {
56725 let a = _mm_set_epi32(17, 18, 19, 20);
56726 let r = _mm256_maskz_broadcast_i32x4(0, a);
56727 assert_eq_m256i(r, _mm256_setzero_si256());
56728 let r = _mm256_maskz_broadcast_i32x4(0b11111111, a);
56729 let e = _mm256_set_epi32(17, 18, 19, 20, 17, 18, 19, 20);
56730 assert_eq_m256i(r, e);
56731 }
56732
56733 #[simd_test(enable = "avx512f")]
56734 const fn test_mm512_broadcast_f32x4() {
56735 let a = _mm_set_ps(17., 18., 19., 20.);
56736 let r = _mm512_broadcast_f32x4(a);
56737 let e = _mm512_set_ps(
56738 17., 18., 19., 20., 17., 18., 19., 20., 17., 18., 19., 20., 17., 18., 19., 20.,
56739 );
56740 assert_eq_m512(r, e);
56741 }
56742
56743 #[simd_test(enable = "avx512f")]
56744 const fn test_mm512_mask_broadcast_f32x4() {
56745 let src = _mm512_set1_ps(20.);
56746 let a = _mm_set_ps(17., 18., 19., 20.);
56747 let r = _mm512_mask_broadcast_f32x4(src, 0, a);
56748 assert_eq_m512(r, src);
56749 let r = _mm512_mask_broadcast_f32x4(src, 0b11111111_11111111, a);
56750 let e = _mm512_set_ps(
56751 17., 18., 19., 20., 17., 18., 19., 20., 17., 18., 19., 20., 17., 18., 19., 20.,
56752 );
56753 assert_eq_m512(r, e);
56754 }
56755
56756 #[simd_test(enable = "avx512f")]
56757 const fn test_mm512_maskz_broadcast_f32x4() {
56758 let a = _mm_set_ps(17., 18., 19., 20.);
56759 let r = _mm512_maskz_broadcast_f32x4(0, a);
56760 assert_eq_m512(r, _mm512_setzero_ps());
56761 let r = _mm512_maskz_broadcast_f32x4(0b00000000_11111111, a);
56762 let e = _mm512_set_ps(
56763 0., 0., 0., 0., 0., 0., 0., 0., 17., 18., 19., 20., 17., 18., 19., 20.,
56764 );
56765 assert_eq_m512(r, e);
56766 }
56767
56768 #[simd_test(enable = "avx512f,avx512vl")]
56769 const fn test_mm256_broadcast_f32x4() {
56770 let a = _mm_set_ps(17., 18., 19., 20.);
56771 let r = _mm256_broadcast_f32x4(a);
56772 let e = _mm256_set_ps(17., 18., 19., 20., 17., 18., 19., 20.);
56773 assert_eq_m256(r, e);
56774 }
56775
56776 #[simd_test(enable = "avx512f,avx512vl")]
56777 const fn test_mm256_mask_broadcast_f32x4() {
56778 let src = _mm256_set1_ps(20.);
56779 let a = _mm_set_ps(17., 18., 19., 20.);
56780 let r = _mm256_mask_broadcast_f32x4(src, 0, a);
56781 assert_eq_m256(r, src);
56782 let r = _mm256_mask_broadcast_f32x4(src, 0b11111111, a);
56783 let e = _mm256_set_ps(17., 18., 19., 20., 17., 18., 19., 20.);
56784 assert_eq_m256(r, e);
56785 }
56786
56787 #[simd_test(enable = "avx512f,avx512vl")]
56788 const fn test_mm256_maskz_broadcast_f32x4() {
56789 let a = _mm_set_ps(17., 18., 19., 20.);
56790 let r = _mm256_maskz_broadcast_f32x4(0, a);
56791 assert_eq_m256(r, _mm256_setzero_ps());
56792 let r = _mm256_maskz_broadcast_f32x4(0b11111111, a);
56793 let e = _mm256_set_ps(17., 18., 19., 20., 17., 18., 19., 20.);
56794 assert_eq_m256(r, e);
56795 }
56796
56797 #[simd_test(enable = "avx512f")]
56798 const fn test_mm512_mask_blend_epi32() {
56799 let a = _mm512_set1_epi32(1);
56800 let b = _mm512_set1_epi32(2);
56801 let r = _mm512_mask_blend_epi32(0b11111111_00000000, a, b);
56802 let e = _mm512_set_epi32(2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1);
56803 assert_eq_m512i(r, e);
56804 }
56805
56806 #[simd_test(enable = "avx512f,avx512vl")]
56807 const fn test_mm256_mask_blend_epi32() {
56808 let a = _mm256_set1_epi32(1);
56809 let b = _mm256_set1_epi32(2);
56810 let r = _mm256_mask_blend_epi32(0b11111111, a, b);
56811 let e = _mm256_set1_epi32(2);
56812 assert_eq_m256i(r, e);
56813 }
56814
56815 #[simd_test(enable = "avx512f,avx512vl")]
56816 const fn test_mm_mask_blend_epi32() {
56817 let a = _mm_set1_epi32(1);
56818 let b = _mm_set1_epi32(2);
56819 let r = _mm_mask_blend_epi32(0b00001111, a, b);
56820 let e = _mm_set1_epi32(2);
56821 assert_eq_m128i(r, e);
56822 }
56823
56824 #[simd_test(enable = "avx512f")]
56825 const fn test_mm512_mask_blend_ps() {
56826 let a = _mm512_set1_ps(1.);
56827 let b = _mm512_set1_ps(2.);
56828 let r = _mm512_mask_blend_ps(0b11111111_00000000, a, b);
56829 let e = _mm512_set_ps(
56830 2., 2., 2., 2., 2., 2., 2., 2., 1., 1., 1., 1., 1., 1., 1., 1.,
56831 );
56832 assert_eq_m512(r, e);
56833 }
56834
56835 #[simd_test(enable = "avx512f,avx512vl")]
56836 const fn test_mm256_mask_blend_ps() {
56837 let a = _mm256_set1_ps(1.);
56838 let b = _mm256_set1_ps(2.);
56839 let r = _mm256_mask_blend_ps(0b11111111, a, b);
56840 let e = _mm256_set1_ps(2.);
56841 assert_eq_m256(r, e);
56842 }
56843
56844 #[simd_test(enable = "avx512f,avx512vl")]
56845 const fn test_mm_mask_blend_ps() {
56846 let a = _mm_set1_ps(1.);
56847 let b = _mm_set1_ps(2.);
56848 let r = _mm_mask_blend_ps(0b00001111, a, b);
56849 let e = _mm_set1_ps(2.);
56850 assert_eq_m128(r, e);
56851 }
56852
56853 #[simd_test(enable = "avx512f")]
56854 const fn test_mm512_unpackhi_epi32() {
56855 let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
56856 let b = _mm512_set_epi32(
56857 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
56858 );
56859 let r = _mm512_unpackhi_epi32(a, b);
56860 let e = _mm512_set_epi32(17, 1, 18, 2, 21, 5, 22, 6, 25, 9, 26, 10, 29, 13, 30, 14);
56861 assert_eq_m512i(r, e);
56862 }
56863
56864 #[simd_test(enable = "avx512f")]
56865 const fn test_mm512_mask_unpackhi_epi32() {
56866 let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
56867 let b = _mm512_set_epi32(
56868 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
56869 );
56870 let r = _mm512_mask_unpackhi_epi32(a, 0, a, b);
56871 assert_eq_m512i(r, a);
56872 let r = _mm512_mask_unpackhi_epi32(a, 0b11111111_11111111, a, b);
56873 let e = _mm512_set_epi32(17, 1, 18, 2, 21, 5, 22, 6, 25, 9, 26, 10, 29, 13, 30, 14);
56874 assert_eq_m512i(r, e);
56875 }
56876
56877 #[simd_test(enable = "avx512f")]
56878 const fn test_mm512_maskz_unpackhi_epi32() {
56879 let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
56880 let b = _mm512_set_epi32(
56881 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
56882 );
56883 let r = _mm512_maskz_unpackhi_epi32(0, a, b);
56884 assert_eq_m512i(r, _mm512_setzero_si512());
56885 let r = _mm512_maskz_unpackhi_epi32(0b00000000_11111111, a, b);
56886 let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 25, 9, 26, 10, 29, 13, 30, 14);
56887 assert_eq_m512i(r, e);
56888 }
56889
56890 #[simd_test(enable = "avx512f,avx512vl")]
56891 const fn test_mm256_mask_unpackhi_epi32() {
56892 let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
56893 let b = _mm256_set_epi32(17, 18, 19, 20, 21, 22, 23, 24);
56894 let r = _mm256_mask_unpackhi_epi32(a, 0, a, b);
56895 assert_eq_m256i(r, a);
56896 let r = _mm256_mask_unpackhi_epi32(a, 0b11111111, a, b);
56897 let e = _mm256_set_epi32(17, 1, 18, 2, 21, 5, 22, 6);
56898 assert_eq_m256i(r, e);
56899 }
56900
56901 #[simd_test(enable = "avx512f,avx512vl")]
56902 const fn test_mm256_maskz_unpackhi_epi32() {
56903 let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
56904 let b = _mm256_set_epi32(17, 18, 19, 20, 21, 22, 23, 24);
56905 let r = _mm256_maskz_unpackhi_epi32(0, a, b);
56906 assert_eq_m256i(r, _mm256_setzero_si256());
56907 let r = _mm256_maskz_unpackhi_epi32(0b11111111, a, b);
56908 let e = _mm256_set_epi32(17, 1, 18, 2, 21, 5, 22, 6);
56909 assert_eq_m256i(r, e);
56910 }
56911
56912 #[simd_test(enable = "avx512f,avx512vl")]
56913 const fn test_mm_mask_unpackhi_epi32() {
56914 let a = _mm_set_epi32(1, 2, 3, 4);
56915 let b = _mm_set_epi32(17, 18, 19, 20);
56916 let r = _mm_mask_unpackhi_epi32(a, 0, a, b);
56917 assert_eq_m128i(r, a);
56918 let r = _mm_mask_unpackhi_epi32(a, 0b00001111, a, b);
56919 let e = _mm_set_epi32(17, 1, 18, 2);
56920 assert_eq_m128i(r, e);
56921 }
56922
56923 #[simd_test(enable = "avx512f,avx512vl")]
56924 const fn test_mm_maskz_unpackhi_epi32() {
56925 let a = _mm_set_epi32(1, 2, 3, 4);
56926 let b = _mm_set_epi32(17, 18, 19, 20);
56927 let r = _mm_maskz_unpackhi_epi32(0, a, b);
56928 assert_eq_m128i(r, _mm_setzero_si128());
56929 let r = _mm_maskz_unpackhi_epi32(0b00001111, a, b);
56930 let e = _mm_set_epi32(17, 1, 18, 2);
56931 assert_eq_m128i(r, e);
56932 }
56933
56934 #[simd_test(enable = "avx512f")]
56935 const fn test_mm512_unpackhi_ps() {
56936 let a = _mm512_set_ps(
56937 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
56938 );
56939 let b = _mm512_set_ps(
56940 17., 18., 19., 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32.,
56941 );
56942 let r = _mm512_unpackhi_ps(a, b);
56943 let e = _mm512_set_ps(
56944 17., 1., 18., 2., 21., 5., 22., 6., 25., 9., 26., 10., 29., 13., 30., 14.,
56945 );
56946 assert_eq_m512(r, e);
56947 }
56948
56949 #[simd_test(enable = "avx512f")]
56950 const fn test_mm512_mask_unpackhi_ps() {
56951 let a = _mm512_set_ps(
56952 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
56953 );
56954 let b = _mm512_set_ps(
56955 17., 18., 19., 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32.,
56956 );
56957 let r = _mm512_mask_unpackhi_ps(a, 0, a, b);
56958 assert_eq_m512(r, a);
56959 let r = _mm512_mask_unpackhi_ps(a, 0b11111111_11111111, a, b);
56960 let e = _mm512_set_ps(
56961 17., 1., 18., 2., 21., 5., 22., 6., 25., 9., 26., 10., 29., 13., 30., 14.,
56962 );
56963 assert_eq_m512(r, e);
56964 }
56965
56966 #[simd_test(enable = "avx512f")]
56967 const fn test_mm512_maskz_unpackhi_ps() {
56968 let a = _mm512_set_ps(
56969 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
56970 );
56971 let b = _mm512_set_ps(
56972 17., 18., 19., 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32.,
56973 );
56974 let r = _mm512_maskz_unpackhi_ps(0, a, b);
56975 assert_eq_m512(r, _mm512_setzero_ps());
56976 let r = _mm512_maskz_unpackhi_ps(0b00000000_11111111, a, b);
56977 let e = _mm512_set_ps(
56978 0., 0., 0., 0., 0., 0., 0., 0., 25., 9., 26., 10., 29., 13., 30., 14.,
56979 );
56980 assert_eq_m512(r, e);
56981 }
56982
56983 #[simd_test(enable = "avx512f,avx512vl")]
56984 const fn test_mm256_mask_unpackhi_ps() {
56985 let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
56986 let b = _mm256_set_ps(17., 18., 19., 20., 21., 22., 23., 24.);
56987 let r = _mm256_mask_unpackhi_ps(a, 0, a, b);
56988 assert_eq_m256(r, a);
56989 let r = _mm256_mask_unpackhi_ps(a, 0b11111111, a, b);
56990 let e = _mm256_set_ps(17., 1., 18., 2., 21., 5., 22., 6.);
56991 assert_eq_m256(r, e);
56992 }
56993
56994 #[simd_test(enable = "avx512f,avx512vl")]
56995 const fn test_mm256_maskz_unpackhi_ps() {
56996 let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
56997 let b = _mm256_set_ps(17., 18., 19., 20., 21., 22., 23., 24.);
56998 let r = _mm256_maskz_unpackhi_ps(0, a, b);
56999 assert_eq_m256(r, _mm256_setzero_ps());
57000 let r = _mm256_maskz_unpackhi_ps(0b11111111, a, b);
57001 let e = _mm256_set_ps(17., 1., 18., 2., 21., 5., 22., 6.);
57002 assert_eq_m256(r, e);
57003 }
57004
57005 #[simd_test(enable = "avx512f,avx512vl")]
57006 const fn test_mm_mask_unpackhi_ps() {
57007 let a = _mm_set_ps(1., 2., 3., 4.);
57008 let b = _mm_set_ps(17., 18., 19., 20.);
57009 let r = _mm_mask_unpackhi_ps(a, 0, a, b);
57010 assert_eq_m128(r, a);
57011 let r = _mm_mask_unpackhi_ps(a, 0b00001111, a, b);
57012 let e = _mm_set_ps(17., 1., 18., 2.);
57013 assert_eq_m128(r, e);
57014 }
57015
57016 #[simd_test(enable = "avx512f,avx512vl")]
57017 const fn test_mm_maskz_unpackhi_ps() {
57018 let a = _mm_set_ps(1., 2., 3., 4.);
57019 let b = _mm_set_ps(17., 18., 19., 20.);
57020 let r = _mm_maskz_unpackhi_ps(0, a, b);
57021 assert_eq_m128(r, _mm_setzero_ps());
57022 let r = _mm_maskz_unpackhi_ps(0b00001111, a, b);
57023 let e = _mm_set_ps(17., 1., 18., 2.);
57024 assert_eq_m128(r, e);
57025 }
57026
57027 #[simd_test(enable = "avx512f")]
57028 const fn test_mm512_unpacklo_epi32() {
57029 let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
57030 let b = _mm512_set_epi32(
57031 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
57032 );
57033 let r = _mm512_unpacklo_epi32(a, b);
57034 let e = _mm512_set_epi32(19, 3, 20, 4, 23, 7, 24, 8, 27, 11, 28, 12, 31, 15, 32, 16);
57035 assert_eq_m512i(r, e);
57036 }
57037
57038 #[simd_test(enable = "avx512f")]
57039 const fn test_mm512_mask_unpacklo_epi32() {
57040 let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
57041 let b = _mm512_set_epi32(
57042 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
57043 );
57044 let r = _mm512_mask_unpacklo_epi32(a, 0, a, b);
57045 assert_eq_m512i(r, a);
57046 let r = _mm512_mask_unpacklo_epi32(a, 0b11111111_11111111, a, b);
57047 let e = _mm512_set_epi32(19, 3, 20, 4, 23, 7, 24, 8, 27, 11, 28, 12, 31, 15, 32, 16);
57048 assert_eq_m512i(r, e);
57049 }
57050
57051 #[simd_test(enable = "avx512f")]
57052 const fn test_mm512_maskz_unpacklo_epi32() {
57053 let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
57054 let b = _mm512_set_epi32(
57055 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
57056 );
57057 let r = _mm512_maskz_unpacklo_epi32(0, a, b);
57058 assert_eq_m512i(r, _mm512_setzero_si512());
57059 let r = _mm512_maskz_unpacklo_epi32(0b00000000_11111111, a, b);
57060 let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 27, 11, 28, 12, 31, 15, 32, 16);
57061 assert_eq_m512i(r, e);
57062 }
57063
57064 #[simd_test(enable = "avx512f,avx512vl")]
57065 const fn test_mm256_mask_unpacklo_epi32() {
57066 let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
57067 let b = _mm256_set_epi32(17, 18, 19, 20, 21, 22, 23, 24);
57068 let r = _mm256_mask_unpacklo_epi32(a, 0, a, b);
57069 assert_eq_m256i(r, a);
57070 let r = _mm256_mask_unpacklo_epi32(a, 0b11111111, a, b);
57071 let e = _mm256_set_epi32(19, 3, 20, 4, 23, 7, 24, 8);
57072 assert_eq_m256i(r, e);
57073 }
57074
57075 #[simd_test(enable = "avx512f,avx512vl")]
57076 const fn test_mm256_maskz_unpacklo_epi32() {
57077 let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
57078 let b = _mm256_set_epi32(17, 18, 19, 20, 21, 22, 23, 24);
57079 let r = _mm256_maskz_unpacklo_epi32(0, a, b);
57080 assert_eq_m256i(r, _mm256_setzero_si256());
57081 let r = _mm256_maskz_unpacklo_epi32(0b11111111, a, b);
57082 let e = _mm256_set_epi32(19, 3, 20, 4, 23, 7, 24, 8);
57083 assert_eq_m256i(r, e);
57084 }
57085
57086 #[simd_test(enable = "avx512f,avx512vl")]
57087 const fn test_mm_mask_unpacklo_epi32() {
57088 let a = _mm_set_epi32(1, 2, 3, 4);
57089 let b = _mm_set_epi32(17, 18, 19, 20);
57090 let r = _mm_mask_unpacklo_epi32(a, 0, a, b);
57091 assert_eq_m128i(r, a);
57092 let r = _mm_mask_unpacklo_epi32(a, 0b00001111, a, b);
57093 let e = _mm_set_epi32(19, 3, 20, 4);
57094 assert_eq_m128i(r, e);
57095 }
57096
57097 #[simd_test(enable = "avx512f,avx512vl")]
57098 const fn test_mm_maskz_unpacklo_epi32() {
57099 let a = _mm_set_epi32(1, 2, 3, 4);
57100 let b = _mm_set_epi32(17, 18, 19, 20);
57101 let r = _mm_maskz_unpacklo_epi32(0, a, b);
57102 assert_eq_m128i(r, _mm_setzero_si128());
57103 let r = _mm_maskz_unpacklo_epi32(0b00001111, a, b);
57104 let e = _mm_set_epi32(19, 3, 20, 4);
57105 assert_eq_m128i(r, e);
57106 }
57107
57108 #[simd_test(enable = "avx512f")]
57109 const fn test_mm512_unpacklo_ps() {
57110 let a = _mm512_set_ps(
57111 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
57112 );
57113 let b = _mm512_set_ps(
57114 17., 18., 19., 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32.,
57115 );
57116 let r = _mm512_unpacklo_ps(a, b);
57117 let e = _mm512_set_ps(
57118 19., 3., 20., 4., 23., 7., 24., 8., 27., 11., 28., 12., 31., 15., 32., 16.,
57119 );
57120 assert_eq_m512(r, e);
57121 }
57122
57123 #[simd_test(enable = "avx512f")]
57124 const fn test_mm512_mask_unpacklo_ps() {
57125 let a = _mm512_set_ps(
57126 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
57127 );
57128 let b = _mm512_set_ps(
57129 17., 18., 19., 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32.,
57130 );
57131 let r = _mm512_mask_unpacklo_ps(a, 0, a, b);
57132 assert_eq_m512(r, a);
57133 let r = _mm512_mask_unpacklo_ps(a, 0b11111111_11111111, a, b);
57134 let e = _mm512_set_ps(
57135 19., 3., 20., 4., 23., 7., 24., 8., 27., 11., 28., 12., 31., 15., 32., 16.,
57136 );
57137 assert_eq_m512(r, e);
57138 }
57139
57140 #[simd_test(enable = "avx512f")]
57141 const fn test_mm512_maskz_unpacklo_ps() {
57142 let a = _mm512_set_ps(
57143 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
57144 );
57145 let b = _mm512_set_ps(
57146 17., 18., 19., 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32.,
57147 );
57148 let r = _mm512_maskz_unpacklo_ps(0, a, b);
57149 assert_eq_m512(r, _mm512_setzero_ps());
57150 let r = _mm512_maskz_unpacklo_ps(0b00000000_11111111, a, b);
57151 let e = _mm512_set_ps(
57152 0., 0., 0., 0., 0., 0., 0., 0., 27., 11., 28., 12., 31., 15., 32., 16.,
57153 );
57154 assert_eq_m512(r, e);
57155 }
57156
57157 #[simd_test(enable = "avx512f,avx512vl")]
57158 const fn test_mm256_mask_unpacklo_ps() {
57159 let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
57160 let b = _mm256_set_ps(17., 18., 19., 20., 21., 22., 23., 24.);
57161 let r = _mm256_mask_unpacklo_ps(a, 0, a, b);
57162 assert_eq_m256(r, a);
57163 let r = _mm256_mask_unpacklo_ps(a, 0b11111111, a, b);
57164 let e = _mm256_set_ps(19., 3., 20., 4., 23., 7., 24., 8.);
57165 assert_eq_m256(r, e);
57166 }
57167
57168 #[simd_test(enable = "avx512f,avx512vl")]
57169 const fn test_mm256_maskz_unpacklo_ps() {
57170 let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
57171 let b = _mm256_set_ps(17., 18., 19., 20., 21., 22., 23., 24.);
57172 let r = _mm256_maskz_unpacklo_ps(0, a, b);
57173 assert_eq_m256(r, _mm256_setzero_ps());
57174 let r = _mm256_maskz_unpacklo_ps(0b11111111, a, b);
57175 let e = _mm256_set_ps(19., 3., 20., 4., 23., 7., 24., 8.);
57176 assert_eq_m256(r, e);
57177 }
57178
57179 #[simd_test(enable = "avx512f,avx512vl")]
57180 const fn test_mm_mask_unpacklo_ps() {
57181 let a = _mm_set_ps(1., 2., 3., 4.);
57182 let b = _mm_set_ps(17., 18., 19., 20.);
57183 let r = _mm_mask_unpacklo_ps(a, 0, a, b);
57184 assert_eq_m128(r, a);
57185 let r = _mm_mask_unpacklo_ps(a, 0b00001111, a, b);
57186 let e = _mm_set_ps(19., 3., 20., 4.);
57187 assert_eq_m128(r, e);
57188 }
57189
57190 #[simd_test(enable = "avx512f,avx512vl")]
57191 const fn test_mm_maskz_unpacklo_ps() {
57192 let a = _mm_set_ps(1., 2., 3., 4.);
57193 let b = _mm_set_ps(17., 18., 19., 20.);
57194 let r = _mm_maskz_unpacklo_ps(0, a, b);
57195 assert_eq_m128(r, _mm_setzero_ps());
57196 let r = _mm_maskz_unpacklo_ps(0b00001111, a, b);
57197 let e = _mm_set_ps(19., 3., 20., 4.);
57198 assert_eq_m128(r, e);
57199 }
57200
57201 #[simd_test(enable = "avx512f")]
57202 const fn test_mm512_alignr_epi32() {
57203 let a = _mm512_set_epi32(16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1);
57204 let b = _mm512_set_epi32(
57205 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17,
57206 );
57207 let r = _mm512_alignr_epi32::<0>(a, b);
57208 assert_eq_m512i(r, b);
57209 let r = _mm512_alignr_epi32::<16>(a, b);
57210 assert_eq_m512i(r, b);
57211 let r = _mm512_alignr_epi32::<1>(a, b);
57212 let e = _mm512_set_epi32(
57213 1, 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18,
57214 );
57215 assert_eq_m512i(r, e);
57216 }
57217
57218 #[simd_test(enable = "avx512f")]
57219 const fn test_mm512_mask_alignr_epi32() {
57220 let a = _mm512_set_epi32(16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1);
57221 let b = _mm512_set_epi32(
57222 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17,
57223 );
57224 let r = _mm512_mask_alignr_epi32::<1>(a, 0, a, b);
57225 assert_eq_m512i(r, a);
57226 let r = _mm512_mask_alignr_epi32::<1>(a, 0b11111111_11111111, a, b);
57227 let e = _mm512_set_epi32(
57228 1, 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18,
57229 );
57230 assert_eq_m512i(r, e);
57231 }
57232
57233 #[simd_test(enable = "avx512f")]
57234 const fn test_mm512_maskz_alignr_epi32() {
57235 let a = _mm512_set_epi32(16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1);
57236 let b = _mm512_set_epi32(
57237 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17,
57238 );
57239 let r = _mm512_maskz_alignr_epi32::<1>(0, a, b);
57240 assert_eq_m512i(r, _mm512_setzero_si512());
57241 let r = _mm512_maskz_alignr_epi32::<1>(0b00000000_11111111, a, b);
57242 let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 25, 24, 23, 22, 21, 20, 19, 18);
57243 assert_eq_m512i(r, e);
57244 }
57245
57246 #[simd_test(enable = "avx512f,avx512vl")]
57247 const fn test_mm256_alignr_epi32() {
57248 let a = _mm256_set_epi32(8, 7, 6, 5, 4, 3, 2, 1);
57249 let b = _mm256_set_epi32(16, 15, 14, 13, 12, 11, 10, 9);
57250 let r = _mm256_alignr_epi32::<0>(a, b);
57251 assert_eq_m256i(r, b);
57252 let r = _mm256_alignr_epi32::<1>(a, b);
57253 let e = _mm256_set_epi32(1, 16, 15, 14, 13, 12, 11, 10);
57254 assert_eq_m256i(r, e);
57255 }
57256
57257 #[simd_test(enable = "avx512f,avx512vl")]
57258 const fn test_mm256_mask_alignr_epi32() {
57259 let a = _mm256_set_epi32(8, 7, 6, 5, 4, 3, 2, 1);
57260 let b = _mm256_set_epi32(16, 15, 14, 13, 12, 11, 10, 9);
57261 let r = _mm256_mask_alignr_epi32::<1>(a, 0, a, b);
57262 assert_eq_m256i(r, a);
57263 let r = _mm256_mask_alignr_epi32::<1>(a, 0b11111111, a, b);
57264 let e = _mm256_set_epi32(1, 16, 15, 14, 13, 12, 11, 10);
57265 assert_eq_m256i(r, e);
57266 }
57267
57268 #[simd_test(enable = "avx512f,avx512vl")]
57269 const fn test_mm256_maskz_alignr_epi32() {
57270 let a = _mm256_set_epi32(8, 7, 6, 5, 4, 3, 2, 1);
57271 let b = _mm256_set_epi32(16, 15, 14, 13, 12, 11, 10, 9);
57272 let r = _mm256_maskz_alignr_epi32::<1>(0, a, b);
57273 assert_eq_m256i(r, _mm256_setzero_si256());
57274 let r = _mm256_maskz_alignr_epi32::<1>(0b11111111, a, b);
57275 let e = _mm256_set_epi32(1, 16, 15, 14, 13, 12, 11, 10);
57276 assert_eq_m256i(r, e);
57277 }
57278
57279 #[simd_test(enable = "avx512f,avx512vl")]
57280 const fn test_mm_alignr_epi32() {
57281 let a = _mm_set_epi32(4, 3, 2, 1);
57282 let b = _mm_set_epi32(8, 7, 6, 5);
57283 let r = _mm_alignr_epi32::<0>(a, b);
57284 assert_eq_m128i(r, b);
57285 let r = _mm_alignr_epi32::<1>(a, b);
57286 let e = _mm_set_epi32(1, 8, 7, 6);
57287 assert_eq_m128i(r, e);
57288 }
57289
57290 #[simd_test(enable = "avx512f,avx512vl")]
57291 const fn test_mm_mask_alignr_epi32() {
57292 let a = _mm_set_epi32(4, 3, 2, 1);
57293 let b = _mm_set_epi32(8, 7, 6, 5);
57294 let r = _mm_mask_alignr_epi32::<1>(a, 0, a, b);
57295 assert_eq_m128i(r, a);
57296 let r = _mm_mask_alignr_epi32::<1>(a, 0b00001111, a, b);
57297 let e = _mm_set_epi32(1, 8, 7, 6);
57298 assert_eq_m128i(r, e);
57299 }
57300
57301 #[simd_test(enable = "avx512f,avx512vl")]
57302 const fn test_mm_maskz_alignr_epi32() {
57303 let a = _mm_set_epi32(4, 3, 2, 1);
57304 let b = _mm_set_epi32(8, 7, 6, 5);
57305 let r = _mm_maskz_alignr_epi32::<1>(0, a, b);
57306 assert_eq_m128i(r, _mm_setzero_si128());
57307 let r = _mm_maskz_alignr_epi32::<1>(0b00001111, a, b);
57308 let e = _mm_set_epi32(1, 8, 7, 6);
57309 assert_eq_m128i(r, e);
57310 }
57311
57312 #[simd_test(enable = "avx512f")]
57313 const fn test_mm512_and_epi32() {
57314 #[rustfmt::skip]
57315 let a = _mm512_set_epi32(
57316 1 << 1 | 1 << 2, 0, 0, 0,
57317 0, 0, 0, 0,
57318 0, 0, 0, 0,
57319 0, 0, 0, 1 << 1 | 1 << 3,
57320 );
57321 #[rustfmt::skip]
57322 let b = _mm512_set_epi32(
57323 1 << 1, 0, 0, 0,
57324 0, 0, 0, 0,
57325 0, 0, 0, 0,
57326 0, 0, 0, 1 << 3 | 1 << 4,
57327 );
57328 let r = _mm512_and_epi32(a, b);
57329 let e = _mm512_set_epi32(1 << 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 << 3);
57330 assert_eq_m512i(r, e);
57331 }
57332
57333 #[simd_test(enable = "avx512f")]
57334 const fn test_mm512_mask_and_epi32() {
57335 #[rustfmt::skip]
57336 let a = _mm512_set_epi32(
57337 1 << 1 | 1 << 2, 0, 0, 0,
57338 0, 0, 0, 0,
57339 0, 0, 0, 0,
57340 0, 0, 0, 1 << 1 | 1 << 3,
57341 );
57342 #[rustfmt::skip]
57343 let b = _mm512_set_epi32(
57344 1 << 1, 0, 0, 0,
57345 0, 0, 0, 0,
57346 0, 0, 0, 0,
57347 0, 0, 0, 1 << 3 | 1 << 4,
57348 );
57349 let r = _mm512_mask_and_epi32(a, 0, a, b);
57350 assert_eq_m512i(r, a);
57351 let r = _mm512_mask_and_epi32(a, 0b01111111_11111111, a, b);
57352 #[rustfmt::skip]
57353 let e = _mm512_set_epi32(
57354 1 << 1 | 1 << 2, 0, 0, 0,
57355 0, 0, 0, 0,
57356 0, 0, 0, 0,
57357 0, 0, 0, 1 << 3,
57358 );
57359 assert_eq_m512i(r, e);
57360 }
57361
57362 #[simd_test(enable = "avx512f")]
57363 const fn test_mm512_maskz_and_epi32() {
57364 #[rustfmt::skip]
57365 let a = _mm512_set_epi32(
57366 1 << 1 | 1 << 2, 0, 0, 0,
57367 0, 0, 0, 0,
57368 0, 0, 0, 0,
57369 0, 0, 0, 1 << 1 | 1 << 3,
57370 );
57371 #[rustfmt::skip]
57372 let b = _mm512_set_epi32(
57373 1 << 1, 0, 0, 0,
57374 0, 0, 0, 0,
57375 0, 0, 0, 0,
57376 0, 0, 0, 1 << 3 | 1 << 4,
57377 );
57378 let r = _mm512_maskz_and_epi32(0, a, b);
57379 assert_eq_m512i(r, _mm512_setzero_si512());
57380 let r = _mm512_maskz_and_epi32(0b00000000_11111111, a, b);
57381 let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 << 3);
57382 assert_eq_m512i(r, e);
57383 }
57384
57385 #[simd_test(enable = "avx512f,avx512vl")]
57386 const fn test_mm256_mask_and_epi32() {
57387 let a = _mm256_set1_epi32(1 << 1 | 1 << 2);
57388 let b = _mm256_set1_epi32(1 << 1);
57389 let r = _mm256_mask_and_epi32(a, 0, a, b);
57390 assert_eq_m256i(r, a);
57391 let r = _mm256_mask_and_epi32(a, 0b11111111, a, b);
57392 let e = _mm256_set1_epi32(1 << 1);
57393 assert_eq_m256i(r, e);
57394 }
57395
57396 #[simd_test(enable = "avx512f,avx512vl")]
57397 const fn test_mm256_maskz_and_epi32() {
57398 let a = _mm256_set1_epi32(1 << 1 | 1 << 2);
57399 let b = _mm256_set1_epi32(1 << 1);
57400 let r = _mm256_maskz_and_epi32(0, a, b);
57401 assert_eq_m256i(r, _mm256_setzero_si256());
57402 let r = _mm256_maskz_and_epi32(0b11111111, a, b);
57403 let e = _mm256_set1_epi32(1 << 1);
57404 assert_eq_m256i(r, e);
57405 }
57406
57407 #[simd_test(enable = "avx512f,avx512vl")]
57408 const fn test_mm_mask_and_epi32() {
57409 let a = _mm_set1_epi32(1 << 1 | 1 << 2);
57410 let b = _mm_set1_epi32(1 << 1);
57411 let r = _mm_mask_and_epi32(a, 0, a, b);
57412 assert_eq_m128i(r, a);
57413 let r = _mm_mask_and_epi32(a, 0b00001111, a, b);
57414 let e = _mm_set1_epi32(1 << 1);
57415 assert_eq_m128i(r, e);
57416 }
57417
57418 #[simd_test(enable = "avx512f,avx512vl")]
57419 const fn test_mm_maskz_and_epi32() {
57420 let a = _mm_set1_epi32(1 << 1 | 1 << 2);
57421 let b = _mm_set1_epi32(1 << 1);
57422 let r = _mm_maskz_and_epi32(0, a, b);
57423 assert_eq_m128i(r, _mm_setzero_si128());
57424 let r = _mm_maskz_and_epi32(0b00001111, a, b);
57425 let e = _mm_set1_epi32(1 << 1);
57426 assert_eq_m128i(r, e);
57427 }
57428
57429 #[simd_test(enable = "avx512f")]
57430 const fn test_mm512_and_si512() {
57431 #[rustfmt::skip]
57432 let a = _mm512_set_epi32(
57433 1 << 1 | 1 << 2, 0, 0, 0,
57434 0, 0, 0, 0,
57435 0, 0, 0, 0,
57436 0, 0, 0, 1 << 1 | 1 << 3,
57437 );
57438 #[rustfmt::skip]
57439 let b = _mm512_set_epi32(
57440 1 << 1, 0, 0, 0,
57441 0, 0, 0, 0,
57442 0, 0, 0, 0,
57443 0, 0, 0, 1 << 3 | 1 << 4,
57444 );
57445 let r = _mm512_and_si512(a, b);
57446 let e = _mm512_set_epi32(1 << 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 << 3);
57447 assert_eq_m512i(r, e);
57448 }
57449
57450 #[simd_test(enable = "avx512f")]
57451 const fn test_mm512_or_epi32() {
57452 #[rustfmt::skip]
57453 let a = _mm512_set_epi32(
57454 1 << 1 | 1 << 2, 0, 0, 0,
57455 0, 0, 0, 0,
57456 0, 0, 0, 0,
57457 0, 0, 0, 1 << 1 | 1 << 3,
57458 );
57459 #[rustfmt::skip]
57460 let b = _mm512_set_epi32(
57461 1 << 1, 0, 0, 0,
57462 0, 0, 0, 0,
57463 0, 0, 0, 0,
57464 0, 0, 0, 1 << 3 | 1 << 4,
57465 );
57466 let r = _mm512_or_epi32(a, b);
57467 #[rustfmt::skip]
57468 let e = _mm512_set_epi32(
57469 1 << 1 | 1 << 2, 0, 0, 0,
57470 0, 0, 0, 0,
57471 0, 0, 0, 0,
57472 0, 0, 0, 1 << 1 | 1 << 3 | 1 << 4,
57473 );
57474 assert_eq_m512i(r, e);
57475 }
57476
57477 #[simd_test(enable = "avx512f")]
57478 const fn test_mm512_mask_or_epi32() {
57479 #[rustfmt::skip]
57480 let a = _mm512_set_epi32(
57481 1 << 1 | 1 << 2, 0, 0, 0,
57482 0, 0, 0, 0,
57483 0, 0, 0, 0,
57484 0, 0, 0, 1 << 1 | 1 << 3,
57485 );
57486 #[rustfmt::skip]
57487 let b = _mm512_set_epi32(
57488 1 << 1, 0, 0, 0,
57489 0, 0, 0, 0,
57490 0, 0, 0, 0,
57491 0, 0, 0, 1 << 3 | 1 << 4,
57492 );
57493 let r = _mm512_mask_or_epi32(a, 0, a, b);
57494 assert_eq_m512i(r, a);
57495 let r = _mm512_mask_or_epi32(a, 0b11111111_11111111, a, b);
57496 #[rustfmt::skip]
57497 let e = _mm512_set_epi32(
57498 1 << 1 | 1 << 2, 0, 0, 0,
57499 0, 0, 0, 0,
57500 0, 0, 0, 0,
57501 0, 0, 0, 1 << 1 | 1 << 3 | 1 << 4,
57502 );
57503 assert_eq_m512i(r, e);
57504 }
57505
57506 #[simd_test(enable = "avx512f")]
57507 const fn test_mm512_maskz_or_epi32() {
57508 #[rustfmt::skip]
57509 let a = _mm512_set_epi32(
57510 1 << 1 | 1 << 2, 0, 0, 0,
57511 0, 0, 0, 0,
57512 0, 0, 0, 0,
57513 0, 0, 0, 1 << 1 | 1 << 3,
57514 );
57515 #[rustfmt::skip]
57516 let b = _mm512_set_epi32(
57517 1 << 1, 0, 0, 0,
57518 0, 0, 0, 0,
57519 0, 0, 0, 0,
57520 0, 0, 0, 1 << 3 | 1 << 4,
57521 );
57522 let r = _mm512_maskz_or_epi32(0, a, b);
57523 assert_eq_m512i(r, _mm512_setzero_si512());
57524 let r = _mm512_maskz_or_epi32(0b00000000_11111111, a, b);
57525 #[rustfmt::skip]
57526 let e = _mm512_set_epi32(
57527 0, 0, 0, 0,
57528 0, 0, 0, 0,
57529 0, 0, 0, 0,
57530 0, 0, 0, 1 << 1 | 1 << 3 | 1 << 4,
57531 );
57532 assert_eq_m512i(r, e);
57533 }
57534
57535 #[simd_test(enable = "avx512f,avx512vl")]
57536 const fn test_mm256_or_epi32() {
57537 let a = _mm256_set1_epi32(1 << 1 | 1 << 2);
57538 let b = _mm256_set1_epi32(1 << 1);
57539 let r = _mm256_or_epi32(a, b);
57540 let e = _mm256_set1_epi32(1 << 1 | 1 << 2);
57541 assert_eq_m256i(r, e);
57542 }
57543
57544 #[simd_test(enable = "avx512f,avx512vl")]
57545 const fn test_mm256_mask_or_epi32() {
57546 let a = _mm256_set1_epi32(1 << 1 | 1 << 2);
57547 let b = _mm256_set1_epi32(1 << 1);
57548 let r = _mm256_mask_or_epi32(a, 0, a, b);
57549 assert_eq_m256i(r, a);
57550 let r = _mm256_mask_or_epi32(a, 0b11111111, a, b);
57551 let e = _mm256_set1_epi32(1 << 1 | 1 << 2);
57552 assert_eq_m256i(r, e);
57553 }
57554
57555 #[simd_test(enable = "avx512f,avx512vl")]
57556 const fn test_mm256_maskz_or_epi32() {
57557 let a = _mm256_set1_epi32(1 << 1 | 1 << 2);
57558 let b = _mm256_set1_epi32(1 << 1);
57559 let r = _mm256_maskz_or_epi32(0, a, b);
57560 assert_eq_m256i(r, _mm256_setzero_si256());
57561 let r = _mm256_maskz_or_epi32(0b11111111, a, b);
57562 let e = _mm256_set1_epi32(1 << 1 | 1 << 2);
57563 assert_eq_m256i(r, e);
57564 }
57565
57566 #[simd_test(enable = "avx512f,avx512vl")]
57567 const fn test_mm_or_epi32() {
57568 let a = _mm_set1_epi32(1 << 1 | 1 << 2);
57569 let b = _mm_set1_epi32(1 << 1);
57570 let r = _mm_or_epi32(a, b);
57571 let e = _mm_set1_epi32(1 << 1 | 1 << 2);
57572 assert_eq_m128i(r, e);
57573 }
57574
57575 #[simd_test(enable = "avx512f,avx512vl")]
57576 const fn test_mm_mask_or_epi32() {
57577 let a = _mm_set1_epi32(1 << 1 | 1 << 2);
57578 let b = _mm_set1_epi32(1 << 1);
57579 let r = _mm_mask_or_epi32(a, 0, a, b);
57580 assert_eq_m128i(r, a);
57581 let r = _mm_mask_or_epi32(a, 0b00001111, a, b);
57582 let e = _mm_set1_epi32(1 << 1 | 1 << 2);
57583 assert_eq_m128i(r, e);
57584 }
57585
57586 #[simd_test(enable = "avx512f,avx512vl")]
57587 const fn test_mm_maskz_or_epi32() {
57588 let a = _mm_set1_epi32(1 << 1 | 1 << 2);
57589 let b = _mm_set1_epi32(1 << 1);
57590 let r = _mm_maskz_or_epi32(0, a, b);
57591 assert_eq_m128i(r, _mm_setzero_si128());
57592 let r = _mm_maskz_or_epi32(0b00001111, a, b);
57593 let e = _mm_set1_epi32(1 << 1 | 1 << 2);
57594 assert_eq_m128i(r, e);
57595 }
57596
57597 #[simd_test(enable = "avx512f")]
57598 const fn test_mm512_or_si512() {
57599 #[rustfmt::skip]
57600 let a = _mm512_set_epi32(
57601 1 << 1 | 1 << 2, 0, 0, 0,
57602 0, 0, 0, 0,
57603 0, 0, 0, 0,
57604 0, 0, 0, 1 << 1 | 1 << 3,
57605 );
57606 #[rustfmt::skip]
57607 let b = _mm512_set_epi32(
57608 1 << 1, 0, 0, 0,
57609 0, 0, 0, 0,
57610 0, 0, 0, 0,
57611 0, 0, 0, 1 << 3 | 1 << 4,
57612 );
57613 let r = _mm512_or_si512(a, b);
57614 #[rustfmt::skip]
57615 let e = _mm512_set_epi32(
57616 1 << 1 | 1 << 2, 0, 0, 0,
57617 0, 0, 0, 0,
57618 0, 0, 0, 0,
57619 0, 0, 0, 1 << 1 | 1 << 3 | 1 << 4,
57620 );
57621 assert_eq_m512i(r, e);
57622 }
57623
57624 #[simd_test(enable = "avx512f")]
57625 const fn test_mm512_xor_epi32() {
57626 #[rustfmt::skip]
57627 let a = _mm512_set_epi32(
57628 1 << 1 | 1 << 2, 0, 0, 0,
57629 0, 0, 0, 0,
57630 0, 0, 0, 0,
57631 0, 0, 0, 1 << 1 | 1 << 3,
57632 );
57633 #[rustfmt::skip]
57634 let b = _mm512_set_epi32(
57635 1 << 1, 0, 0, 0,
57636 0, 0, 0, 0,
57637 0, 0, 0, 0,
57638 0, 0, 0, 1 << 3 | 1 << 4,
57639 );
57640 let r = _mm512_xor_epi32(a, b);
57641 #[rustfmt::skip]
57642 let e = _mm512_set_epi32(
57643 1 << 2, 0, 0, 0,
57644 0, 0, 0, 0,
57645 0, 0, 0, 0,
57646 0, 0, 0, 1 << 1 | 1 << 4,
57647 );
57648 assert_eq_m512i(r, e);
57649 }
57650
57651 #[simd_test(enable = "avx512f")]
57652 const fn test_mm512_mask_xor_epi32() {
57653 #[rustfmt::skip]
57654 let a = _mm512_set_epi32(
57655 1 << 1 | 1 << 2, 0, 0, 0,
57656 0, 0, 0, 0,
57657 0, 0, 0, 0,
57658 0, 0, 0, 1 << 1 | 1 << 3,
57659 );
57660 #[rustfmt::skip]
57661 let b = _mm512_set_epi32(
57662 1 << 1, 0, 0, 0,
57663 0, 0, 0, 0,
57664 0, 0, 0, 0,
57665 0, 0, 0, 1 << 3 | 1 << 4,
57666 );
57667 let r = _mm512_mask_xor_epi32(a, 0, a, b);
57668 assert_eq_m512i(r, a);
57669 let r = _mm512_mask_xor_epi32(a, 0b01111111_11111111, a, b);
57670 #[rustfmt::skip]
57671 let e = _mm512_set_epi32(
57672 1 << 1 | 1 << 2, 0, 0, 0,
57673 0, 0, 0, 0,
57674 0, 0, 0, 0,
57675 0, 0, 0, 1 << 1 | 1 << 4,
57676 );
57677 assert_eq_m512i(r, e);
57678 }
57679
57680 #[simd_test(enable = "avx512f")]
57681 const fn test_mm512_maskz_xor_epi32() {
57682 #[rustfmt::skip]
57683 let a = _mm512_set_epi32(
57684 1 << 1 | 1 << 2, 0, 0, 0,
57685 0, 0, 0, 0,
57686 0, 0, 0, 0,
57687 0, 0, 0, 1 << 1 | 1 << 3,
57688 );
57689 #[rustfmt::skip]
57690 let b = _mm512_set_epi32(
57691 1 << 1, 0, 0, 0,
57692 0, 0, 0, 0,
57693 0, 0, 0, 0,
57694 0, 0, 0, 1 << 3 | 1 << 4,
57695 );
57696 let r = _mm512_maskz_xor_epi32(0, a, b);
57697 assert_eq_m512i(r, _mm512_setzero_si512());
57698 let r = _mm512_maskz_xor_epi32(0b00000000_11111111, a, b);
57699 let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 4);
57700 assert_eq_m512i(r, e);
57701 }
57702
57703 #[simd_test(enable = "avx512f,avx512vl")]
57704 const fn test_mm256_xor_epi32() {
57705 let a = _mm256_set1_epi32(1 << 1 | 1 << 2);
57706 let b = _mm256_set1_epi32(1 << 1);
57707 let r = _mm256_xor_epi32(a, b);
57708 let e = _mm256_set1_epi32(1 << 2);
57709 assert_eq_m256i(r, e);
57710 }
57711
57712 #[simd_test(enable = "avx512f,avx512vl")]
57713 const fn test_mm256_mask_xor_epi32() {
57714 let a = _mm256_set1_epi32(1 << 1 | 1 << 2);
57715 let b = _mm256_set1_epi32(1 << 1);
57716 let r = _mm256_mask_xor_epi32(a, 0, a, b);
57717 assert_eq_m256i(r, a);
57718 let r = _mm256_mask_xor_epi32(a, 0b11111111, a, b);
57719 let e = _mm256_set1_epi32(1 << 2);
57720 assert_eq_m256i(r, e);
57721 }
57722
57723 #[simd_test(enable = "avx512f,avx512vl")]
57724 const fn test_mm256_maskz_xor_epi32() {
57725 let a = _mm256_set1_epi32(1 << 1 | 1 << 2);
57726 let b = _mm256_set1_epi32(1 << 1);
57727 let r = _mm256_maskz_xor_epi32(0, a, b);
57728 assert_eq_m256i(r, _mm256_setzero_si256());
57729 let r = _mm256_maskz_xor_epi32(0b11111111, a, b);
57730 let e = _mm256_set1_epi32(1 << 2);
57731 assert_eq_m256i(r, e);
57732 }
57733
57734 #[simd_test(enable = "avx512f,avx512vl")]
57735 const fn test_mm_xor_epi32() {
57736 let a = _mm_set1_epi32(1 << 1 | 1 << 2);
57737 let b = _mm_set1_epi32(1 << 1);
57738 let r = _mm_xor_epi32(a, b);
57739 let e = _mm_set1_epi32(1 << 2);
57740 assert_eq_m128i(r, e);
57741 }
57742
57743 #[simd_test(enable = "avx512f,avx512vl")]
57744 const fn test_mm_mask_xor_epi32() {
57745 let a = _mm_set1_epi32(1 << 1 | 1 << 2);
57746 let b = _mm_set1_epi32(1 << 1);
57747 let r = _mm_mask_xor_epi32(a, 0, a, b);
57748 assert_eq_m128i(r, a);
57749 let r = _mm_mask_xor_epi32(a, 0b00001111, a, b);
57750 let e = _mm_set1_epi32(1 << 2);
57751 assert_eq_m128i(r, e);
57752 }
57753
57754 #[simd_test(enable = "avx512f,avx512vl")]
57755 const fn test_mm_maskz_xor_epi32() {
57756 let a = _mm_set1_epi32(1 << 1 | 1 << 2);
57757 let b = _mm_set1_epi32(1 << 1);
57758 let r = _mm_maskz_xor_epi32(0, a, b);
57759 assert_eq_m128i(r, _mm_setzero_si128());
57760 let r = _mm_maskz_xor_epi32(0b00001111, a, b);
57761 let e = _mm_set1_epi32(1 << 2);
57762 assert_eq_m128i(r, e);
57763 }
57764
57765 #[simd_test(enable = "avx512f")]
57766 const fn test_mm512_xor_si512() {
57767 #[rustfmt::skip]
57768 let a = _mm512_set_epi32(
57769 1 << 1 | 1 << 2, 0, 0, 0,
57770 0, 0, 0, 0,
57771 0, 0, 0, 0,
57772 0, 0, 0, 1 << 1 | 1 << 3,
57773 );
57774 #[rustfmt::skip]
57775 let b = _mm512_set_epi32(
57776 1 << 1, 0, 0, 0,
57777 0, 0, 0, 0,
57778 0, 0, 0, 0,
57779 0, 0, 0, 1 << 3 | 1 << 4,
57780 );
57781 let r = _mm512_xor_si512(a, b);
57782 #[rustfmt::skip]
57783 let e = _mm512_set_epi32(
57784 1 << 2, 0, 0, 0,
57785 0, 0, 0, 0,
57786 0, 0, 0, 0,
57787 0, 0, 0, 1 << 1 | 1 << 4,
57788 );
57789 assert_eq_m512i(r, e);
57790 }
57791
57792 #[simd_test(enable = "avx512f")]
57793 const fn test_mm512_andnot_epi32() {
57794 let a = _mm512_set1_epi32(0);
57795 let b = _mm512_set1_epi32(1 << 3 | 1 << 4);
57796 let r = _mm512_andnot_epi32(a, b);
57797 let e = _mm512_set1_epi32(1 << 3 | 1 << 4);
57798 assert_eq_m512i(r, e);
57799 }
57800
57801 #[simd_test(enable = "avx512f")]
57802 const fn test_mm512_mask_andnot_epi32() {
57803 let a = _mm512_set1_epi32(1 << 1 | 1 << 2);
57804 let b = _mm512_set1_epi32(1 << 3 | 1 << 4);
57805 let r = _mm512_mask_andnot_epi32(a, 0, a, b);
57806 assert_eq_m512i(r, a);
57807 let r = _mm512_mask_andnot_epi32(a, 0b11111111_11111111, a, b);
57808 let e = _mm512_set1_epi32(1 << 3 | 1 << 4);
57809 assert_eq_m512i(r, e);
57810 }
57811
57812 #[simd_test(enable = "avx512f")]
57813 const fn test_mm512_maskz_andnot_epi32() {
57814 let a = _mm512_set1_epi32(1 << 1 | 1 << 2);
57815 let b = _mm512_set1_epi32(1 << 3 | 1 << 4);
57816 let r = _mm512_maskz_andnot_epi32(0, a, b);
57817 assert_eq_m512i(r, _mm512_setzero_si512());
57818 let r = _mm512_maskz_andnot_epi32(0b00000000_11111111, a, b);
57819 #[rustfmt::skip]
57820 let e = _mm512_set_epi32(
57821 0, 0, 0, 0,
57822 0, 0, 0, 0,
57823 1 << 3 | 1 << 4, 1 << 3 | 1 << 4, 1 << 3 | 1 << 4, 1 << 3 | 1 << 4,
57824 1 << 3 | 1 << 4, 1 << 3 | 1 << 4, 1 << 3 | 1 << 4, 1 << 3 | 1 << 4,
57825 );
57826 assert_eq_m512i(r, e);
57827 }
57828
57829 #[simd_test(enable = "avx512f,avx512vl")]
57830 const fn test_mm256_mask_andnot_epi32() {
57831 let a = _mm256_set1_epi32(1 << 1 | 1 << 2);
57832 let b = _mm256_set1_epi32(1 << 3 | 1 << 4);
57833 let r = _mm256_mask_andnot_epi32(a, 0, a, b);
57834 assert_eq_m256i(r, a);
57835 let r = _mm256_mask_andnot_epi32(a, 0b11111111, a, b);
57836 let e = _mm256_set1_epi32(1 << 3 | 1 << 4);
57837 assert_eq_m256i(r, e);
57838 }
57839
57840 #[simd_test(enable = "avx512f,avx512vl")]
57841 const fn test_mm256_maskz_andnot_epi32() {
57842 let a = _mm256_set1_epi32(1 << 1 | 1 << 2);
57843 let b = _mm256_set1_epi32(1 << 3 | 1 << 4);
57844 let r = _mm256_maskz_andnot_epi32(0, a, b);
57845 assert_eq_m256i(r, _mm256_setzero_si256());
57846 let r = _mm256_maskz_andnot_epi32(0b11111111, a, b);
57847 let e = _mm256_set1_epi32(1 << 3 | 1 << 4);
57848 assert_eq_m256i(r, e);
57849 }
57850
57851 #[simd_test(enable = "avx512f,avx512vl")]
57852 const fn test_mm_mask_andnot_epi32() {
57853 let a = _mm_set1_epi32(1 << 1 | 1 << 2);
57854 let b = _mm_set1_epi32(1 << 3 | 1 << 4);
57855 let r = _mm_mask_andnot_epi32(a, 0, a, b);
57856 assert_eq_m128i(r, a);
57857 let r = _mm_mask_andnot_epi32(a, 0b00001111, a, b);
57858 let e = _mm_set1_epi32(1 << 3 | 1 << 4);
57859 assert_eq_m128i(r, e);
57860 }
57861
57862 #[simd_test(enable = "avx512f,avx512vl")]
57863 const fn test_mm_maskz_andnot_epi32() {
57864 let a = _mm_set1_epi32(1 << 1 | 1 << 2);
57865 let b = _mm_set1_epi32(1 << 3 | 1 << 4);
57866 let r = _mm_maskz_andnot_epi32(0, a, b);
57867 assert_eq_m128i(r, _mm_setzero_si128());
57868 let r = _mm_maskz_andnot_epi32(0b00001111, a, b);
57869 let e = _mm_set1_epi32(1 << 3 | 1 << 4);
57870 assert_eq_m128i(r, e);
57871 }
57872
57873 #[simd_test(enable = "avx512f")]
57874 const fn test_cvtmask16_u32() {
57875 let a: __mmask16 = 0b11001100_00110011;
57876 let r = _cvtmask16_u32(a);
57877 let e: u32 = 0b11001100_00110011;
57878 assert_eq!(r, e);
57879 }
57880
57881 #[simd_test(enable = "avx512f")]
57882 const fn test_cvtu32_mask16() {
57883 let a: u32 = 0b11001100_00110011;
57884 let r = _cvtu32_mask16(a);
57885 let e: __mmask16 = 0b11001100_00110011;
57886 assert_eq!(r, e);
57887 }
57888
57889 #[simd_test(enable = "avx512f")]
57890 const fn test_mm512_kand() {
57891 let a: u16 = 0b11001100_00110011;
57892 let b: u16 = 0b11001100_00110011;
57893 let r = _mm512_kand(a, b);
57894 let e: u16 = 0b11001100_00110011;
57895 assert_eq!(r, e);
57896 }
57897
57898 #[simd_test(enable = "avx512f")]
57899 const fn test_kand_mask16() {
57900 let a: u16 = 0b11001100_00110011;
57901 let b: u16 = 0b11001100_00110011;
57902 let r = _kand_mask16(a, b);
57903 let e: u16 = 0b11001100_00110011;
57904 assert_eq!(r, e);
57905 }
57906
57907 #[simd_test(enable = "avx512f")]
57908 const fn test_mm512_kor() {
57909 let a: u16 = 0b11001100_00110011;
57910 let b: u16 = 0b00101110_00001011;
57911 let r = _mm512_kor(a, b);
57912 let e: u16 = 0b11101110_00111011;
57913 assert_eq!(r, e);
57914 }
57915
57916 #[simd_test(enable = "avx512f")]
57917 const fn test_kor_mask16() {
57918 let a: u16 = 0b11001100_00110011;
57919 let b: u16 = 0b00101110_00001011;
57920 let r = _kor_mask16(a, b);
57921 let e: u16 = 0b11101110_00111011;
57922 assert_eq!(r, e);
57923 }
57924
57925 #[simd_test(enable = "avx512f")]
57926 const fn test_mm512_kxor() {
57927 let a: u16 = 0b11001100_00110011;
57928 let b: u16 = 0b00101110_00001011;
57929 let r = _mm512_kxor(a, b);
57930 let e: u16 = 0b11100010_00111000;
57931 assert_eq!(r, e);
57932 }
57933
57934 #[simd_test(enable = "avx512f")]
57935 const fn test_kxor_mask16() {
57936 let a: u16 = 0b11001100_00110011;
57937 let b: u16 = 0b00101110_00001011;
57938 let r = _kxor_mask16(a, b);
57939 let e: u16 = 0b11100010_00111000;
57940 assert_eq!(r, e);
57941 }
57942
57943 #[simd_test(enable = "avx512f")]
57944 const fn test_mm512_knot() {
57945 let a: u16 = 0b11001100_00110011;
57946 let r = _mm512_knot(a);
57947 let e: u16 = 0b00110011_11001100;
57948 assert_eq!(r, e);
57949 }
57950
57951 #[simd_test(enable = "avx512f")]
57952 const fn test_knot_mask16() {
57953 let a: u16 = 0b11001100_00110011;
57954 let r = _knot_mask16(a);
57955 let e: u16 = 0b00110011_11001100;
57956 assert_eq!(r, e);
57957 }
57958
57959 #[simd_test(enable = "avx512f")]
57960 const fn test_mm512_kandn() {
57961 let a: u16 = 0b11001100_00110011;
57962 let b: u16 = 0b00101110_00001011;
57963 let r = _mm512_kandn(a, b);
57964 let e: u16 = 0b00100010_00001000;
57965 assert_eq!(r, e);
57966 }
57967
57968 #[simd_test(enable = "avx512f")]
57969 const fn test_kandn_mask16() {
57970 let a: u16 = 0b11001100_00110011;
57971 let b: u16 = 0b00101110_00001011;
57972 let r = _kandn_mask16(a, b);
57973 let e: u16 = 0b00100010_00001000;
57974 assert_eq!(r, e);
57975 }
57976
57977 #[simd_test(enable = "avx512f")]
57978 const fn test_mm512_kxnor() {
57979 let a: u16 = 0b11001100_00110011;
57980 let b: u16 = 0b00101110_00001011;
57981 let r = _mm512_kxnor(a, b);
57982 let e: u16 = 0b00011101_11000111;
57983 assert_eq!(r, e);
57984 }
57985
57986 #[simd_test(enable = "avx512f")]
57987 const fn test_kxnor_mask16() {
57988 let a: u16 = 0b11001100_00110011;
57989 let b: u16 = 0b00101110_00001011;
57990 let r = _kxnor_mask16(a, b);
57991 let e: u16 = 0b00011101_11000111;
57992 assert_eq!(r, e);
57993 }
57994
57995 #[simd_test(enable = "avx512f")]
57996 const fn test_kortest_mask16_u8() {
57997 let a: __mmask16 = 0b0110100101101001;
57998 let b: __mmask16 = 0b1011011010110110;
57999 let mut all_ones: u8 = 0;
58000 let r = unsafe { _kortest_mask16_u8(a, b, &mut all_ones) };
58001 assert_eq!(r, 0);
58002 assert_eq!(all_ones, 1);
58003 }
58004
58005 #[simd_test(enable = "avx512f")]
58006 const fn test_kortestc_mask16_u8() {
58007 let a: __mmask16 = 0b0110100101101001;
58008 let b: __mmask16 = 0b1011011010110110;
58009 let r = _kortestc_mask16_u8(a, b);
58010 assert_eq!(r, 1);
58011 }
58012
58013 #[simd_test(enable = "avx512f")]
58014 const fn test_kortestz_mask16_u8() {
58015 let a: __mmask16 = 0b0110100101101001;
58016 let b: __mmask16 = 0b1011011010110110;
58017 let r = _kortestz_mask16_u8(a, b);
58018 assert_eq!(r, 0);
58019 }
58020
58021 #[simd_test(enable = "avx512f")]
58022 const fn test_kshiftli_mask16() {
58023 let a: __mmask16 = 0b1001011011000011;
58024 let r = _kshiftli_mask16::<3>(a);
58025 let e: __mmask16 = 0b1011011000011000;
58026 assert_eq!(r, e);
58027
58028 let r = _kshiftli_mask16::<15>(a);
58029 let e: __mmask16 = 0b1000000000000000;
58030 assert_eq!(r, e);
58031
58032 let r = _kshiftli_mask16::<16>(a);
58033 let e: __mmask16 = 0b0000000000000000;
58034 assert_eq!(r, e);
58035
58036 let r = _kshiftli_mask16::<17>(a);
58037 let e: __mmask16 = 0b0000000000000000;
58038 assert_eq!(r, e);
58039 }
58040
58041 #[simd_test(enable = "avx512f")]
58042 const fn test_kshiftri_mask16() {
58043 let a: __mmask16 = 0b1010100100111100;
58044 let r = _kshiftri_mask16::<3>(a);
58045 let e: __mmask16 = 0b0001010100100111;
58046 assert_eq!(r, e);
58047
58048 let r = _kshiftri_mask16::<15>(a);
58049 let e: __mmask16 = 0b0000000000000001;
58050 assert_eq!(r, e);
58051
58052 let r = _kshiftri_mask16::<16>(a);
58053 let e: __mmask16 = 0b0000000000000000;
58054 assert_eq!(r, e);
58055
58056 let r = _kshiftri_mask16::<17>(a);
58057 let e: __mmask16 = 0b0000000000000000;
58058 assert_eq!(r, e);
58059 }
58060
58061 #[simd_test(enable = "avx512f")]
58062 const fn test_load_mask16() {
58063 let a: __mmask16 = 0b1001011011000011;
58064 let r = unsafe { _load_mask16(&a) };
58065 let e: __mmask16 = 0b1001011011000011;
58066 assert_eq!(r, e);
58067 }
58068
58069 #[simd_test(enable = "avx512f")]
58070 const fn test_store_mask16() {
58071 let a: __mmask16 = 0b0110100100111100;
58072 let mut r = 0;
58073 unsafe {
58074 _store_mask16(&mut r, a);
58075 }
58076 let e: __mmask16 = 0b0110100100111100;
58077 assert_eq!(r, e);
58078 }
58079
58080 #[simd_test(enable = "avx512f")]
58081 const fn test_mm512_kmov() {
58082 let a: u16 = 0b11001100_00110011;
58083 let r = _mm512_kmov(a);
58084 let e: u16 = 0b11001100_00110011;
58085 assert_eq!(r, e);
58086 }
58087
58088 #[simd_test(enable = "avx512f")]
58089 const fn test_mm512_int2mask() {
58090 let a: i32 = 0b11001100_00110011;
58091 let r = _mm512_int2mask(a);
58092 let e: u16 = 0b11001100_00110011;
58093 assert_eq!(r, e);
58094 }
58095
58096 #[simd_test(enable = "avx512f")]
58097 const fn test_mm512_mask2int() {
58098 let k1: __mmask16 = 0b11001100_00110011;
58099 let r = _mm512_mask2int(k1);
58100 let e: i32 = 0b11001100_00110011;
58101 assert_eq!(r, e);
58102 }
58103
58104 #[simd_test(enable = "avx512f")]
58105 const fn test_mm512_kunpackb() {
58106 let a: u16 = 0b11001100_00110011;
58107 let b: u16 = 0b00101110_00001011;
58108 let r = _mm512_kunpackb(a, b);
58109 let e: u16 = 0b00110011_00001011;
58110 assert_eq!(r, e);
58111 }
58112
58113 #[simd_test(enable = "avx512f")]
58114 const fn test_mm512_kortestc() {
58115 let a: u16 = 0b11001100_00110011;
58116 let b: u16 = 0b00101110_00001011;
58117 let r = _mm512_kortestc(a, b);
58118 assert_eq!(r, 0);
58119 let b: u16 = 0b11111111_11111111;
58120 let r = _mm512_kortestc(a, b);
58121 assert_eq!(r, 1);
58122 }
58123
58124 #[simd_test(enable = "avx512f")]
58125 const fn test_mm512_kortestz() {
58126 let a: u16 = 0b11001100_00110011;
58127 let b: u16 = 0b00101110_00001011;
58128 let r = _mm512_kortestz(a, b);
58129 assert_eq!(r, 0);
58130 let r = _mm512_kortestz(0, 0);
58131 assert_eq!(r, 1);
58132 }
58133
58134 #[simd_test(enable = "avx512f")]
58135 const fn test_mm512_test_epi32_mask() {
58136 let a = _mm512_set1_epi32(1 << 0);
58137 let b = _mm512_set1_epi32(1 << 0 | 1 << 1);
58138 let r = _mm512_test_epi32_mask(a, b);
58139 let e: __mmask16 = 0b11111111_11111111;
58140 assert_eq!(r, e);
58141 }
58142
58143 #[simd_test(enable = "avx512f")]
58144 const fn test_mm512_mask_test_epi32_mask() {
58145 let a = _mm512_set1_epi32(1 << 0);
58146 let b = _mm512_set1_epi32(1 << 0 | 1 << 1);
58147 let r = _mm512_mask_test_epi32_mask(0, a, b);
58148 assert_eq!(r, 0);
58149 let r = _mm512_mask_test_epi32_mask(0b11111111_11111111, a, b);
58150 let e: __mmask16 = 0b11111111_11111111;
58151 assert_eq!(r, e);
58152 }
58153
58154 #[simd_test(enable = "avx512f,avx512vl")]
58155 const fn test_mm256_test_epi32_mask() {
58156 let a = _mm256_set1_epi32(1 << 0);
58157 let b = _mm256_set1_epi32(1 << 0 | 1 << 1);
58158 let r = _mm256_test_epi32_mask(a, b);
58159 let e: __mmask8 = 0b11111111;
58160 assert_eq!(r, e);
58161 }
58162
58163 #[simd_test(enable = "avx512f,avx512vl")]
58164 const fn test_mm256_mask_test_epi32_mask() {
58165 let a = _mm256_set1_epi32(1 << 0);
58166 let b = _mm256_set1_epi32(1 << 0 | 1 << 1);
58167 let r = _mm256_mask_test_epi32_mask(0, a, b);
58168 assert_eq!(r, 0);
58169 let r = _mm256_mask_test_epi32_mask(0b11111111, a, b);
58170 let e: __mmask8 = 0b11111111;
58171 assert_eq!(r, e);
58172 }
58173
58174 #[simd_test(enable = "avx512f,avx512vl")]
58175 const fn test_mm_test_epi32_mask() {
58176 let a = _mm_set1_epi32(1 << 0);
58177 let b = _mm_set1_epi32(1 << 0 | 1 << 1);
58178 let r = _mm_test_epi32_mask(a, b);
58179 let e: __mmask8 = 0b00001111;
58180 assert_eq!(r, e);
58181 }
58182
58183 #[simd_test(enable = "avx512f,avx512vl")]
58184 const fn test_mm_mask_test_epi32_mask() {
58185 let a = _mm_set1_epi32(1 << 0);
58186 let b = _mm_set1_epi32(1 << 0 | 1 << 1);
58187 let r = _mm_mask_test_epi32_mask(0, a, b);
58188 assert_eq!(r, 0);
58189 let r = _mm_mask_test_epi32_mask(0b11111111, a, b);
58190 let e: __mmask8 = 0b00001111;
58191 assert_eq!(r, e);
58192 }
58193
58194 #[simd_test(enable = "avx512f")]
58195 const fn test_mm512_testn_epi32_mask() {
58196 let a = _mm512_set1_epi32(1 << 0);
58197 let b = _mm512_set1_epi32(1 << 0 | 1 << 1);
58198 let r = _mm512_testn_epi32_mask(a, b);
58199 let e: __mmask16 = 0b00000000_00000000;
58200 assert_eq!(r, e);
58201 }
58202
58203 #[simd_test(enable = "avx512f")]
58204 const fn test_mm512_mask_testn_epi32_mask() {
58205 let a = _mm512_set1_epi32(1 << 0);
58206 let b = _mm512_set1_epi32(1 << 1);
58207 let r = _mm512_mask_test_epi32_mask(0, a, b);
58208 assert_eq!(r, 0);
58209 let r = _mm512_mask_testn_epi32_mask(0b11111111_11111111, a, b);
58210 let e: __mmask16 = 0b11111111_11111111;
58211 assert_eq!(r, e);
58212 }
58213
58214 #[simd_test(enable = "avx512f,avx512vl")]
58215 const fn test_mm256_testn_epi32_mask() {
58216 let a = _mm256_set1_epi32(1 << 0);
58217 let b = _mm256_set1_epi32(1 << 1);
58218 let r = _mm256_testn_epi32_mask(a, b);
58219 let e: __mmask8 = 0b11111111;
58220 assert_eq!(r, e);
58221 }
58222
58223 #[simd_test(enable = "avx512f,avx512vl")]
58224 const fn test_mm256_mask_testn_epi32_mask() {
58225 let a = _mm256_set1_epi32(1 << 0);
58226 let b = _mm256_set1_epi32(1 << 1);
58227 let r = _mm256_mask_test_epi32_mask(0, a, b);
58228 assert_eq!(r, 0);
58229 let r = _mm256_mask_testn_epi32_mask(0b11111111, a, b);
58230 let e: __mmask8 = 0b11111111;
58231 assert_eq!(r, e);
58232 }
58233
58234 #[simd_test(enable = "avx512f,avx512vl")]
58235 const fn test_mm_testn_epi32_mask() {
58236 let a = _mm_set1_epi32(1 << 0);
58237 let b = _mm_set1_epi32(1 << 1);
58238 let r = _mm_testn_epi32_mask(a, b);
58239 let e: __mmask8 = 0b00001111;
58240 assert_eq!(r, e);
58241 }
58242
58243 #[simd_test(enable = "avx512f,avx512vl")]
58244 const fn test_mm_mask_testn_epi32_mask() {
58245 let a = _mm_set1_epi32(1 << 0);
58246 let b = _mm_set1_epi32(1 << 1);
58247 let r = _mm_mask_test_epi32_mask(0, a, b);
58248 assert_eq!(r, 0);
58249 let r = _mm_mask_testn_epi32_mask(0b11111111, a, b);
58250 let e: __mmask8 = 0b00001111;
58251 assert_eq!(r, e);
58252 }
58253
58254 #[simd_test(enable = "avx512f")]
58255 #[cfg_attr(miri, ignore)]
58256 fn test_mm512_stream_ps() {
58257 #[repr(align(64))]
58258 struct Memory {
58259 pub data: [f32; 16], // 64 bytes
58260 }
58261 let a = _mm512_set1_ps(7.0);
58262 let mut mem = Memory { data: [-1.0; 16] };
58263
58264 unsafe {
58265 _mm512_stream_ps(&mut mem.data[0] as *mut f32, a);
58266 }
58267 _mm_sfence();
58268 for i in 0..16 {
58269 assert_eq!(mem.data[i], get_m512(a, i));
58270 }
58271 }
58272
58273 #[simd_test(enable = "avx512f")]
58274 #[cfg_attr(miri, ignore)]
58275 fn test_mm512_stream_pd() {
58276 #[repr(align(64))]
58277 struct Memory {
58278 pub data: [f64; 8],
58279 }
58280 let a = _mm512_set1_pd(7.0);
58281 let mut mem = Memory { data: [-1.0; 8] };
58282
58283 unsafe {
58284 _mm512_stream_pd(&mut mem.data[0] as *mut f64, a);
58285 }
58286 _mm_sfence();
58287 for i in 0..8 {
58288 assert_eq!(mem.data[i], get_m512d(a, i));
58289 }
58290 }
58291
58292 #[simd_test(enable = "avx512f")]
58293 #[cfg_attr(miri, ignore)]
58294 fn test_mm512_stream_si512() {
58295 #[repr(align(64))]
58296 struct Memory {
58297 pub data: [i64; 8],
58298 }
58299 let a = _mm512_set1_epi32(7);
58300 let mut mem = Memory { data: [-1; 8] };
58301
58302 unsafe {
58303 _mm512_stream_si512(mem.data.as_mut_ptr().cast(), a);
58304 }
58305 _mm_sfence();
58306 for i in 0..8 {
58307 assert_eq!(mem.data[i], get_m512i(a, i));
58308 }
58309 }
58310
58311 #[simd_test(enable = "avx512f")]
58312 fn test_mm512_stream_load_si512() {
58313 let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
58314 let r = unsafe { _mm512_stream_load_si512(core::ptr::addr_of!(a) as *const _) };
58315 assert_eq_m512i(a, r);
58316 }
58317
58318 #[simd_test(enable = "avx512f")]
58319 const fn test_mm512_reduce_add_epi32() {
58320 let a = _mm512_set1_epi32(1);
58321 let e: i32 = _mm512_reduce_add_epi32(a);
58322 assert_eq!(16, e);
58323 }
58324
58325 #[simd_test(enable = "avx512f")]
58326 const fn test_mm512_mask_reduce_add_epi32() {
58327 let a = _mm512_set1_epi32(1);
58328 let e: i32 = _mm512_mask_reduce_add_epi32(0b11111111_00000000, a);
58329 assert_eq!(8, e);
58330 }
58331
58332 #[simd_test(enable = "avx512f")]
58333 const fn test_mm512_reduce_add_ps() {
58334 let a = _mm512_set1_ps(1.);
58335 let e: f32 = _mm512_reduce_add_ps(a);
58336 assert_eq!(16., e);
58337 }
58338
58339 #[simd_test(enable = "avx512f")]
58340 const fn test_mm512_mask_reduce_add_ps() {
58341 let a = _mm512_set1_ps(1.);
58342 let e: f32 = _mm512_mask_reduce_add_ps(0b11111111_00000000, a);
58343 assert_eq!(8., e);
58344 }
58345
58346 #[simd_test(enable = "avx512f")]
58347 const fn test_mm512_reduce_mul_epi32() {
58348 let a = _mm512_set1_epi32(2);
58349 let e: i32 = _mm512_reduce_mul_epi32(a);
58350 assert_eq!(65536, e);
58351 }
58352
58353 #[simd_test(enable = "avx512f")]
58354 const fn test_mm512_mask_reduce_mul_epi32() {
58355 let a = _mm512_set1_epi32(2);
58356 let e: i32 = _mm512_mask_reduce_mul_epi32(0b11111111_00000000, a);
58357 assert_eq!(256, e);
58358 }
58359
58360 #[simd_test(enable = "avx512f")]
58361 const fn test_mm512_reduce_mul_ps() {
58362 let a = _mm512_set1_ps(2.);
58363 let e: f32 = _mm512_reduce_mul_ps(a);
58364 assert_eq!(65536., e);
58365 }
58366
58367 #[simd_test(enable = "avx512f")]
58368 const fn test_mm512_mask_reduce_mul_ps() {
58369 let a = _mm512_set1_ps(2.);
58370 let e: f32 = _mm512_mask_reduce_mul_ps(0b11111111_00000000, a);
58371 assert_eq!(256., e);
58372 }
58373
58374 #[simd_test(enable = "avx512f")]
58375 const fn test_mm512_reduce_max_epi32() {
58376 let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
58377 let e: i32 = _mm512_reduce_max_epi32(a);
58378 assert_eq!(15, e);
58379 }
58380
58381 #[simd_test(enable = "avx512f")]
58382 const fn test_mm512_mask_reduce_max_epi32() {
58383 let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
58384 let e: i32 = _mm512_mask_reduce_max_epi32(0b11111111_00000000, a);
58385 assert_eq!(7, e);
58386 }
58387
58388 #[simd_test(enable = "avx512f")]
58389 const fn test_mm512_reduce_max_epu32() {
58390 let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
58391 let e: u32 = _mm512_reduce_max_epu32(a);
58392 assert_eq!(15, e);
58393 }
58394
58395 #[simd_test(enable = "avx512f")]
58396 const fn test_mm512_mask_reduce_max_epu32() {
58397 let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
58398 let e: u32 = _mm512_mask_reduce_max_epu32(0b11111111_00000000, a);
58399 assert_eq!(7, e);
58400 }
58401
58402 #[simd_test(enable = "avx512f")]
58403 fn test_mm512_reduce_max_ps() {
58404 let a = _mm512_set_ps(
58405 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
58406 );
58407 let e: f32 = _mm512_reduce_max_ps(a);
58408 assert_eq!(15., e);
58409 }
58410
58411 #[simd_test(enable = "avx512f")]
58412 fn test_mm512_mask_reduce_max_ps() {
58413 let a = _mm512_set_ps(
58414 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
58415 );
58416 let e: f32 = _mm512_mask_reduce_max_ps(0b11111111_00000000, a);
58417 assert_eq!(7., e);
58418 }
58419
58420 #[simd_test(enable = "avx512f")]
58421 const fn test_mm512_reduce_min_epi32() {
58422 let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
58423 let e: i32 = _mm512_reduce_min_epi32(a);
58424 assert_eq!(0, e);
58425 }
58426
58427 #[simd_test(enable = "avx512f")]
58428 const fn test_mm512_mask_reduce_min_epi32() {
58429 let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
58430 let e: i32 = _mm512_mask_reduce_min_epi32(0b11111111_00000000, a);
58431 assert_eq!(0, e);
58432 }
58433
58434 #[simd_test(enable = "avx512f")]
58435 const fn test_mm512_reduce_min_epu32() {
58436 let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
58437 let e: u32 = _mm512_reduce_min_epu32(a);
58438 assert_eq!(0, e);
58439 }
58440
58441 #[simd_test(enable = "avx512f")]
58442 const fn test_mm512_mask_reduce_min_epu32() {
58443 let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
58444 let e: u32 = _mm512_mask_reduce_min_epu32(0b11111111_00000000, a);
58445 assert_eq!(0, e);
58446 }
58447
58448 #[simd_test(enable = "avx512f")]
58449 fn test_mm512_reduce_min_ps() {
58450 let a = _mm512_set_ps(
58451 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
58452 );
58453 let e: f32 = _mm512_reduce_min_ps(a);
58454 assert_eq!(0., e);
58455 }
58456
58457 #[simd_test(enable = "avx512f")]
58458 fn test_mm512_mask_reduce_min_ps() {
58459 let a = _mm512_set_ps(
58460 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
58461 );
58462 let e: f32 = _mm512_mask_reduce_min_ps(0b11111111_00000000, a);
58463 assert_eq!(0., e);
58464 }
58465
58466 #[simd_test(enable = "avx512f")]
58467 const fn test_mm512_reduce_and_epi32() {
58468 let a = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2);
58469 let e: i32 = _mm512_reduce_and_epi32(a);
58470 assert_eq!(0, e);
58471 }
58472
58473 #[simd_test(enable = "avx512f")]
58474 const fn test_mm512_mask_reduce_and_epi32() {
58475 let a = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2);
58476 let e: i32 = _mm512_mask_reduce_and_epi32(0b11111111_00000000, a);
58477 assert_eq!(1, e);
58478 }
58479
58480 #[simd_test(enable = "avx512f")]
58481 const fn test_mm512_reduce_or_epi32() {
58482 let a = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2);
58483 let e: i32 = _mm512_reduce_or_epi32(a);
58484 assert_eq!(3, e);
58485 }
58486
58487 #[simd_test(enable = "avx512f")]
58488 const fn test_mm512_mask_reduce_or_epi32() {
58489 let a = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2);
58490 let e: i32 = _mm512_mask_reduce_and_epi32(0b11111111_00000000, a);
58491 assert_eq!(1, e);
58492 }
58493
58494 #[simd_test(enable = "avx512f")]
58495 fn test_mm512_mask_compress_epi32() {
58496 let src = _mm512_set1_epi32(200);
58497 let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
58498 let r = _mm512_mask_compress_epi32(src, 0, a);
58499 assert_eq_m512i(r, src);
58500 let r = _mm512_mask_compress_epi32(src, 0b01010101_01010101, a);
58501 let e = _mm512_set_epi32(
58502 200, 200, 200, 200, 200, 200, 200, 200, 1, 3, 5, 7, 9, 11, 13, 15,
58503 );
58504 assert_eq_m512i(r, e);
58505 }
58506
58507 #[simd_test(enable = "avx512f")]
58508 fn test_mm512_maskz_compress_epi32() {
58509 let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
58510 let r = _mm512_maskz_compress_epi32(0, a);
58511 assert_eq_m512i(r, _mm512_setzero_si512());
58512 let r = _mm512_maskz_compress_epi32(0b01010101_01010101, a);
58513 let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 5, 7, 9, 11, 13, 15);
58514 assert_eq_m512i(r, e);
58515 }
58516
58517 #[simd_test(enable = "avx512f,avx512vl")]
58518 fn test_mm256_mask_compress_epi32() {
58519 let src = _mm256_set1_epi32(200);
58520 let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
58521 let r = _mm256_mask_compress_epi32(src, 0, a);
58522 assert_eq_m256i(r, src);
58523 let r = _mm256_mask_compress_epi32(src, 0b01010101, a);
58524 let e = _mm256_set_epi32(200, 200, 200, 200, 1, 3, 5, 7);
58525 assert_eq_m256i(r, e);
58526 }
58527
58528 #[simd_test(enable = "avx512f,avx512vl")]
58529 fn test_mm256_maskz_compress_epi32() {
58530 let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
58531 let r = _mm256_maskz_compress_epi32(0, a);
58532 assert_eq_m256i(r, _mm256_setzero_si256());
58533 let r = _mm256_maskz_compress_epi32(0b01010101, a);
58534 let e = _mm256_set_epi32(0, 0, 0, 0, 1, 3, 5, 7);
58535 assert_eq_m256i(r, e);
58536 }
58537
58538 #[simd_test(enable = "avx512f,avx512vl")]
58539 fn test_mm_mask_compress_epi32() {
58540 let src = _mm_set1_epi32(200);
58541 let a = _mm_set_epi32(0, 1, 2, 3);
58542 let r = _mm_mask_compress_epi32(src, 0, a);
58543 assert_eq_m128i(r, src);
58544 let r = _mm_mask_compress_epi32(src, 0b00000101, a);
58545 let e = _mm_set_epi32(200, 200, 1, 3);
58546 assert_eq_m128i(r, e);
58547 }
58548
58549 #[simd_test(enable = "avx512f,avx512vl")]
58550 fn test_mm_maskz_compress_epi32() {
58551 let a = _mm_set_epi32(0, 1, 2, 3);
58552 let r = _mm_maskz_compress_epi32(0, a);
58553 assert_eq_m128i(r, _mm_setzero_si128());
58554 let r = _mm_maskz_compress_epi32(0b00000101, a);
58555 let e = _mm_set_epi32(0, 0, 1, 3);
58556 assert_eq_m128i(r, e);
58557 }
58558
58559 #[simd_test(enable = "avx512f")]
58560 fn test_mm512_mask_compress_ps() {
58561 let src = _mm512_set1_ps(200.);
58562 let a = _mm512_set_ps(
58563 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
58564 );
58565 let r = _mm512_mask_compress_ps(src, 0, a);
58566 assert_eq_m512(r, src);
58567 let r = _mm512_mask_compress_ps(src, 0b01010101_01010101, a);
58568 let e = _mm512_set_ps(
58569 200., 200., 200., 200., 200., 200., 200., 200., 1., 3., 5., 7., 9., 11., 13., 15.,
58570 );
58571 assert_eq_m512(r, e);
58572 }
58573
58574 #[simd_test(enable = "avx512f")]
58575 fn test_mm512_maskz_compress_ps() {
58576 let a = _mm512_set_ps(
58577 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
58578 );
58579 let r = _mm512_maskz_compress_ps(0, a);
58580 assert_eq_m512(r, _mm512_setzero_ps());
58581 let r = _mm512_maskz_compress_ps(0b01010101_01010101, a);
58582 let e = _mm512_set_ps(
58583 0., 0., 0., 0., 0., 0., 0., 0., 1., 3., 5., 7., 9., 11., 13., 15.,
58584 );
58585 assert_eq_m512(r, e);
58586 }
58587
58588 #[simd_test(enable = "avx512f,avx512vl")]
58589 fn test_mm256_mask_compress_ps() {
58590 let src = _mm256_set1_ps(200.);
58591 let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
58592 let r = _mm256_mask_compress_ps(src, 0, a);
58593 assert_eq_m256(r, src);
58594 let r = _mm256_mask_compress_ps(src, 0b01010101, a);
58595 let e = _mm256_set_ps(200., 200., 200., 200., 1., 3., 5., 7.);
58596 assert_eq_m256(r, e);
58597 }
58598
58599 #[simd_test(enable = "avx512f,avx512vl")]
58600 fn test_mm256_maskz_compress_ps() {
58601 let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
58602 let r = _mm256_maskz_compress_ps(0, a);
58603 assert_eq_m256(r, _mm256_setzero_ps());
58604 let r = _mm256_maskz_compress_ps(0b01010101, a);
58605 let e = _mm256_set_ps(0., 0., 0., 0., 1., 3., 5., 7.);
58606 assert_eq_m256(r, e);
58607 }
58608
58609 #[simd_test(enable = "avx512f,avx512vl")]
58610 fn test_mm_mask_compress_ps() {
58611 let src = _mm_set1_ps(200.);
58612 let a = _mm_set_ps(0., 1., 2., 3.);
58613 let r = _mm_mask_compress_ps(src, 0, a);
58614 assert_eq_m128(r, src);
58615 let r = _mm_mask_compress_ps(src, 0b00000101, a);
58616 let e = _mm_set_ps(200., 200., 1., 3.);
58617 assert_eq_m128(r, e);
58618 }
58619
58620 #[simd_test(enable = "avx512f,avx512vl")]
58621 fn test_mm_maskz_compress_ps() {
58622 let a = _mm_set_ps(0., 1., 2., 3.);
58623 let r = _mm_maskz_compress_ps(0, a);
58624 assert_eq_m128(r, _mm_setzero_ps());
58625 let r = _mm_maskz_compress_ps(0b00000101, a);
58626 let e = _mm_set_ps(0., 0., 1., 3.);
58627 assert_eq_m128(r, e);
58628 }
58629
58630 #[simd_test(enable = "avx512f")]
58631 fn test_mm512_mask_compressstoreu_epi32() {
58632 let a = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
58633 let mut r = [0_i32; 16];
58634 unsafe {
58635 _mm512_mask_compressstoreu_epi32(r.as_mut_ptr(), 0, a);
58636 }
58637 assert_eq!(&r, &[0_i32; 16]);
58638 unsafe {
58639 _mm512_mask_compressstoreu_epi32(r.as_mut_ptr(), 0b1111000011001010, a);
58640 }
58641 assert_eq!(&r, &[2, 4, 7, 8, 13, 14, 15, 16, 0, 0, 0, 0, 0, 0, 0, 0]);
58642 }
58643
58644 #[simd_test(enable = "avx512f,avx512vl")]
58645 fn test_mm256_mask_compressstoreu_epi32() {
58646 let a = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8);
58647 let mut r = [0_i32; 8];
58648 unsafe {
58649 _mm256_mask_compressstoreu_epi32(r.as_mut_ptr(), 0, a);
58650 }
58651 assert_eq!(&r, &[0_i32; 8]);
58652 unsafe {
58653 _mm256_mask_compressstoreu_epi32(r.as_mut_ptr(), 0b11001010, a);
58654 }
58655 assert_eq!(&r, &[2, 4, 7, 8, 0, 0, 0, 0]);
58656 }
58657
58658 #[simd_test(enable = "avx512f,avx512vl")]
58659 fn test_mm_mask_compressstoreu_epi32() {
58660 let a = _mm_setr_epi32(1, 2, 3, 4);
58661 let mut r = [0_i32; 4];
58662 unsafe {
58663 _mm_mask_compressstoreu_epi32(r.as_mut_ptr(), 0, a);
58664 }
58665 assert_eq!(&r, &[0_i32; 4]);
58666 unsafe {
58667 _mm_mask_compressstoreu_epi32(r.as_mut_ptr(), 0b1011, a);
58668 }
58669 assert_eq!(&r, &[1, 2, 4, 0]);
58670 }
58671
58672 #[simd_test(enable = "avx512f")]
58673 fn test_mm512_mask_compressstoreu_epi64() {
58674 let a = _mm512_setr_epi64(1, 2, 3, 4, 5, 6, 7, 8);
58675 let mut r = [0_i64; 8];
58676 unsafe {
58677 _mm512_mask_compressstoreu_epi64(r.as_mut_ptr(), 0, a);
58678 }
58679 assert_eq!(&r, &[0_i64; 8]);
58680 unsafe {
58681 _mm512_mask_compressstoreu_epi64(r.as_mut_ptr(), 0b11001010, a);
58682 }
58683 assert_eq!(&r, &[2, 4, 7, 8, 0, 0, 0, 0]);
58684 }
58685
58686 #[simd_test(enable = "avx512f,avx512vl")]
58687 fn test_mm256_mask_compressstoreu_epi64() {
58688 let a = _mm256_setr_epi64x(1, 2, 3, 4);
58689 let mut r = [0_i64; 4];
58690 unsafe {
58691 _mm256_mask_compressstoreu_epi64(r.as_mut_ptr(), 0, a);
58692 }
58693 assert_eq!(&r, &[0_i64; 4]);
58694 unsafe {
58695 _mm256_mask_compressstoreu_epi64(r.as_mut_ptr(), 0b1011, a);
58696 }
58697 assert_eq!(&r, &[1, 2, 4, 0]);
58698 }
58699
58700 #[simd_test(enable = "avx512f,avx512vl")]
58701 fn test_mm_mask_compressstoreu_epi64() {
58702 let a = _mm_setr_epi64x(1, 2);
58703 let mut r = [0_i64; 2];
58704 unsafe {
58705 _mm_mask_compressstoreu_epi64(r.as_mut_ptr(), 0, a);
58706 }
58707 assert_eq!(&r, &[0_i64; 2]);
58708 unsafe {
58709 _mm_mask_compressstoreu_epi64(r.as_mut_ptr(), 0b10, a);
58710 }
58711 assert_eq!(&r, &[2, 0]);
58712 }
58713
58714 #[simd_test(enable = "avx512f")]
58715 fn test_mm512_mask_compressstoreu_ps() {
58716 let a = _mm512_setr_ps(
58717 1_f32, 2_f32, 3_f32, 4_f32, 5_f32, 6_f32, 7_f32, 8_f32, 9_f32, 10_f32, 11_f32, 12_f32,
58718 13_f32, 14_f32, 15_f32, 16_f32,
58719 );
58720 let mut r = [0_f32; 16];
58721 unsafe {
58722 _mm512_mask_compressstoreu_ps(r.as_mut_ptr(), 0, a);
58723 }
58724 assert_eq!(&r, &[0_f32; 16]);
58725 unsafe {
58726 _mm512_mask_compressstoreu_ps(r.as_mut_ptr(), 0b1111000011001010, a);
58727 }
58728 assert_eq!(
58729 &r,
58730 &[
58731 2_f32, 4_f32, 7_f32, 8_f32, 13_f32, 14_f32, 15_f32, 16_f32, 0_f32, 0_f32, 0_f32,
58732 0_f32, 0_f32, 0_f32, 0_f32, 0_f32
58733 ]
58734 );
58735 }
58736
58737 #[simd_test(enable = "avx512f,avx512vl")]
58738 fn test_mm256_mask_compressstoreu_ps() {
58739 let a = _mm256_setr_ps(1_f32, 2_f32, 3_f32, 4_f32, 5_f32, 6_f32, 7_f32, 8_f32);
58740 let mut r = [0_f32; 8];
58741 unsafe {
58742 _mm256_mask_compressstoreu_ps(r.as_mut_ptr(), 0, a);
58743 }
58744 assert_eq!(&r, &[0_f32; 8]);
58745 unsafe {
58746 _mm256_mask_compressstoreu_ps(r.as_mut_ptr(), 0b11001010, a);
58747 }
58748 assert_eq!(
58749 &r,
58750 &[2_f32, 4_f32, 7_f32, 8_f32, 0_f32, 0_f32, 0_f32, 0_f32]
58751 );
58752 }
58753
58754 #[simd_test(enable = "avx512f,avx512vl")]
58755 fn test_mm_mask_compressstoreu_ps() {
58756 let a = _mm_setr_ps(1_f32, 2_f32, 3_f32, 4_f32);
58757 let mut r = [0.; 4];
58758 unsafe {
58759 _mm_mask_compressstoreu_ps(r.as_mut_ptr(), 0, a);
58760 }
58761 assert_eq!(&r, &[0.; 4]);
58762 unsafe {
58763 _mm_mask_compressstoreu_ps(r.as_mut_ptr(), 0b1011, a);
58764 }
58765 assert_eq!(&r, &[1_f32, 2_f32, 4_f32, 0_f32]);
58766 }
58767
58768 #[simd_test(enable = "avx512f")]
58769 fn test_mm512_mask_compressstoreu_pd() {
58770 let a = _mm512_setr_pd(1., 2., 3., 4., 5., 6., 7., 8.);
58771 let mut r = [0.; 8];
58772 unsafe {
58773 _mm512_mask_compressstoreu_pd(r.as_mut_ptr(), 0, a);
58774 }
58775 assert_eq!(&r, &[0.; 8]);
58776 unsafe {
58777 _mm512_mask_compressstoreu_pd(r.as_mut_ptr(), 0b11001010, a);
58778 }
58779 assert_eq!(&r, &[2., 4., 7., 8., 0., 0., 0., 0.]);
58780 }
58781
58782 #[simd_test(enable = "avx512f,avx512vl")]
58783 fn test_mm256_mask_compressstoreu_pd() {
58784 let a = _mm256_setr_pd(1., 2., 3., 4.);
58785 let mut r = [0.; 4];
58786 unsafe {
58787 _mm256_mask_compressstoreu_pd(r.as_mut_ptr(), 0, a);
58788 }
58789 assert_eq!(&r, &[0.; 4]);
58790 unsafe {
58791 _mm256_mask_compressstoreu_pd(r.as_mut_ptr(), 0b1011, a);
58792 }
58793 assert_eq!(&r, &[1., 2., 4., 0.]);
58794 }
58795
58796 #[simd_test(enable = "avx512f,avx512vl")]
58797 fn test_mm_mask_compressstoreu_pd() {
58798 let a = _mm_setr_pd(1., 2.);
58799 let mut r = [0.; 2];
58800 unsafe {
58801 _mm_mask_compressstoreu_pd(r.as_mut_ptr(), 0, a);
58802 }
58803 assert_eq!(&r, &[0.; 2]);
58804 unsafe {
58805 _mm_mask_compressstoreu_pd(r.as_mut_ptr(), 0b10, a);
58806 }
58807 assert_eq!(&r, &[2., 0.]);
58808 }
58809
58810 #[simd_test(enable = "avx512f")]
58811 fn test_mm512_mask_expand_epi32() {
58812 let src = _mm512_set1_epi32(200);
58813 let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
58814 let r = _mm512_mask_expand_epi32(src, 0, a);
58815 assert_eq_m512i(r, src);
58816 let r = _mm512_mask_expand_epi32(src, 0b01010101_01010101, a);
58817 let e = _mm512_set_epi32(
58818 200, 8, 200, 9, 200, 10, 200, 11, 200, 12, 200, 13, 200, 14, 200, 15,
58819 );
58820 assert_eq_m512i(r, e);
58821 }
58822
58823 #[simd_test(enable = "avx512f")]
58824 fn test_mm512_maskz_expand_epi32() {
58825 let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
58826 let r = _mm512_maskz_expand_epi32(0, a);
58827 assert_eq_m512i(r, _mm512_setzero_si512());
58828 let r = _mm512_maskz_expand_epi32(0b01010101_01010101, a);
58829 let e = _mm512_set_epi32(0, 8, 0, 9, 0, 10, 0, 11, 0, 12, 0, 13, 0, 14, 0, 15);
58830 assert_eq_m512i(r, e);
58831 }
58832
58833 #[simd_test(enable = "avx512f,avx512vl")]
58834 fn test_mm256_mask_expand_epi32() {
58835 let src = _mm256_set1_epi32(200);
58836 let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
58837 let r = _mm256_mask_expand_epi32(src, 0, a);
58838 assert_eq_m256i(r, src);
58839 let r = _mm256_mask_expand_epi32(src, 0b01010101, a);
58840 let e = _mm256_set_epi32(200, 4, 200, 5, 200, 6, 200, 7);
58841 assert_eq_m256i(r, e);
58842 }
58843
58844 #[simd_test(enable = "avx512f,avx512vl")]
58845 fn test_mm256_maskz_expand_epi32() {
58846 let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
58847 let r = _mm256_maskz_expand_epi32(0, a);
58848 assert_eq_m256i(r, _mm256_setzero_si256());
58849 let r = _mm256_maskz_expand_epi32(0b01010101, a);
58850 let e = _mm256_set_epi32(0, 4, 0, 5, 0, 6, 0, 7);
58851 assert_eq_m256i(r, e);
58852 }
58853
58854 #[simd_test(enable = "avx512f,avx512vl")]
58855 fn test_mm_mask_expand_epi32() {
58856 let src = _mm_set1_epi32(200);
58857 let a = _mm_set_epi32(0, 1, 2, 3);
58858 let r = _mm_mask_expand_epi32(src, 0, a);
58859 assert_eq_m128i(r, src);
58860 let r = _mm_mask_expand_epi32(src, 0b00000101, a);
58861 let e = _mm_set_epi32(200, 2, 200, 3);
58862 assert_eq_m128i(r, e);
58863 }
58864
58865 #[simd_test(enable = "avx512f,avx512vl")]
58866 fn test_mm_maskz_expand_epi32() {
58867 let a = _mm_set_epi32(0, 1, 2, 3);
58868 let r = _mm_maskz_expand_epi32(0, a);
58869 assert_eq_m128i(r, _mm_setzero_si128());
58870 let r = _mm_maskz_expand_epi32(0b00000101, a);
58871 let e = _mm_set_epi32(0, 2, 0, 3);
58872 assert_eq_m128i(r, e);
58873 }
58874
58875 #[simd_test(enable = "avx512f")]
58876 fn test_mm512_mask_expand_ps() {
58877 let src = _mm512_set1_ps(200.);
58878 let a = _mm512_set_ps(
58879 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
58880 );
58881 let r = _mm512_mask_expand_ps(src, 0, a);
58882 assert_eq_m512(r, src);
58883 let r = _mm512_mask_expand_ps(src, 0b01010101_01010101, a);
58884 let e = _mm512_set_ps(
58885 200., 8., 200., 9., 200., 10., 200., 11., 200., 12., 200., 13., 200., 14., 200., 15.,
58886 );
58887 assert_eq_m512(r, e);
58888 }
58889
58890 #[simd_test(enable = "avx512f")]
58891 fn test_mm512_maskz_expand_ps() {
58892 let a = _mm512_set_ps(
58893 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
58894 );
58895 let r = _mm512_maskz_expand_ps(0, a);
58896 assert_eq_m512(r, _mm512_setzero_ps());
58897 let r = _mm512_maskz_expand_ps(0b01010101_01010101, a);
58898 let e = _mm512_set_ps(
58899 0., 8., 0., 9., 0., 10., 0., 11., 0., 12., 0., 13., 0., 14., 0., 15.,
58900 );
58901 assert_eq_m512(r, e);
58902 }
58903
58904 #[simd_test(enable = "avx512f,avx512vl")]
58905 fn test_mm256_mask_expand_ps() {
58906 let src = _mm256_set1_ps(200.);
58907 let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
58908 let r = _mm256_mask_expand_ps(src, 0, a);
58909 assert_eq_m256(r, src);
58910 let r = _mm256_mask_expand_ps(src, 0b01010101, a);
58911 let e = _mm256_set_ps(200., 4., 200., 5., 200., 6., 200., 7.);
58912 assert_eq_m256(r, e);
58913 }
58914
58915 #[simd_test(enable = "avx512f,avx512vl")]
58916 fn test_mm256_maskz_expand_ps() {
58917 let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
58918 let r = _mm256_maskz_expand_ps(0, a);
58919 assert_eq_m256(r, _mm256_setzero_ps());
58920 let r = _mm256_maskz_expand_ps(0b01010101, a);
58921 let e = _mm256_set_ps(0., 4., 0., 5., 0., 6., 0., 7.);
58922 assert_eq_m256(r, e);
58923 }
58924
58925 #[simd_test(enable = "avx512f,avx512vl")]
58926 fn test_mm_mask_expand_ps() {
58927 let src = _mm_set1_ps(200.);
58928 let a = _mm_set_ps(0., 1., 2., 3.);
58929 let r = _mm_mask_expand_ps(src, 0, a);
58930 assert_eq_m128(r, src);
58931 let r = _mm_mask_expand_ps(src, 0b00000101, a);
58932 let e = _mm_set_ps(200., 2., 200., 3.);
58933 assert_eq_m128(r, e);
58934 }
58935
58936 #[simd_test(enable = "avx512f,avx512vl")]
58937 fn test_mm_maskz_expand_ps() {
58938 let a = _mm_set_ps(0., 1., 2., 3.);
58939 let r = _mm_maskz_expand_ps(0, a);
58940 assert_eq_m128(r, _mm_setzero_ps());
58941 let r = _mm_maskz_expand_ps(0b00000101, a);
58942 let e = _mm_set_ps(0., 2., 0., 3.);
58943 assert_eq_m128(r, e);
58944 }
58945
58946 #[simd_test(enable = "avx512f")]
58947 const fn test_mm512_loadu_epi32() {
58948 let a = &[4, 3, 2, 5, 8, 9, 64, 50, -4, -3, -2, -5, -8, -9, -64, -50];
58949 let p = a.as_ptr();
58950 let r = unsafe { _mm512_loadu_epi32(black_box(p)) };
58951 let e = _mm512_setr_epi32(4, 3, 2, 5, 8, 9, 64, 50, -4, -3, -2, -5, -8, -9, -64, -50);
58952 assert_eq_m512i(r, e);
58953 }
58954
58955 #[simd_test(enable = "avx512f,avx512vl")]
58956 const fn test_mm256_loadu_epi32() {
58957 let a = &[4, 3, 2, 5, 8, 9, 64, 50];
58958 let p = a.as_ptr();
58959 let r = unsafe { _mm256_loadu_epi32(black_box(p)) };
58960 let e = _mm256_setr_epi32(4, 3, 2, 5, 8, 9, 64, 50);
58961 assert_eq_m256i(r, e);
58962 }
58963
58964 #[simd_test(enable = "avx512f,avx512vl")]
58965 const fn test_mm_loadu_epi32() {
58966 let a = &[4, 3, 2, 5];
58967 let p = a.as_ptr();
58968 let r = unsafe { _mm_loadu_epi32(black_box(p)) };
58969 let e = _mm_setr_epi32(4, 3, 2, 5);
58970 assert_eq_m128i(r, e);
58971 }
58972
58973 #[simd_test(enable = "avx512f")]
58974 fn test_mm512_mask_cvtepi32_storeu_epi16() {
58975 let a = _mm512_set1_epi32(9);
58976 let mut r = _mm256_undefined_si256();
58977 unsafe {
58978 _mm512_mask_cvtepi32_storeu_epi16(&mut r as *mut _ as *mut i16, 0b11111111_11111111, a);
58979 }
58980 let e = _mm256_set1_epi16(9);
58981 assert_eq_m256i(r, e);
58982 }
58983
58984 #[simd_test(enable = "avx512f,avx512vl")]
58985 fn test_mm256_mask_cvtepi32_storeu_epi16() {
58986 let a = _mm256_set1_epi32(9);
58987 let mut r = _mm_undefined_si128();
58988 unsafe {
58989 _mm256_mask_cvtepi32_storeu_epi16(&mut r as *mut _ as *mut i16, 0b11111111, a);
58990 }
58991 let e = _mm_set1_epi16(9);
58992 assert_eq_m128i(r, e);
58993 }
58994
58995 #[simd_test(enable = "avx512f,avx512vl")]
58996 fn test_mm_mask_cvtepi32_storeu_epi16() {
58997 let a = _mm_set1_epi32(9);
58998 let mut r = _mm_set1_epi8(0);
58999 unsafe {
59000 _mm_mask_cvtepi32_storeu_epi16(&mut r as *mut _ as *mut i16, 0b11111111, a);
59001 }
59002 let e = _mm_set_epi16(0, 0, 0, 0, 9, 9, 9, 9);
59003 assert_eq_m128i(r, e);
59004 }
59005
59006 #[simd_test(enable = "avx512f")]
59007 fn test_mm512_mask_cvtsepi32_storeu_epi16() {
59008 let a = _mm512_set1_epi32(i32::MAX);
59009 let mut r = _mm256_undefined_si256();
59010 unsafe {
59011 _mm512_mask_cvtsepi32_storeu_epi16(
59012 &mut r as *mut _ as *mut i16,
59013 0b11111111_11111111,
59014 a,
59015 );
59016 }
59017 let e = _mm256_set1_epi16(i16::MAX);
59018 assert_eq_m256i(r, e);
59019 }
59020
59021 #[simd_test(enable = "avx512f,avx512vl")]
59022 fn test_mm256_mask_cvtsepi32_storeu_epi16() {
59023 let a = _mm256_set1_epi32(i32::MAX);
59024 let mut r = _mm_undefined_si128();
59025 unsafe {
59026 _mm256_mask_cvtsepi32_storeu_epi16(&mut r as *mut _ as *mut i16, 0b11111111, a);
59027 }
59028 let e = _mm_set1_epi16(i16::MAX);
59029 assert_eq_m128i(r, e);
59030 }
59031
59032 #[simd_test(enable = "avx512f,avx512vl")]
59033 fn test_mm_mask_cvtsepi32_storeu_epi16() {
59034 let a = _mm_set1_epi32(i32::MAX);
59035 let mut r = _mm_set1_epi8(0);
59036 unsafe {
59037 _mm_mask_cvtsepi32_storeu_epi16(&mut r as *mut _ as *mut i16, 0b11111111, a);
59038 }
59039 let e = _mm_set_epi16(0, 0, 0, 0, i16::MAX, i16::MAX, i16::MAX, i16::MAX);
59040 assert_eq_m128i(r, e);
59041 }
59042
59043 #[simd_test(enable = "avx512f")]
59044 fn test_mm512_mask_cvtusepi32_storeu_epi16() {
59045 let a = _mm512_set1_epi32(i32::MAX);
59046 let mut r = _mm256_undefined_si256();
59047 unsafe {
59048 _mm512_mask_cvtusepi32_storeu_epi16(
59049 &mut r as *mut _ as *mut i16,
59050 0b11111111_11111111,
59051 a,
59052 );
59053 }
59054 let e = _mm256_set1_epi16(u16::MAX as i16);
59055 assert_eq_m256i(r, e);
59056 }
59057
59058 #[simd_test(enable = "avx512f,avx512vl")]
59059 fn test_mm256_mask_cvtusepi32_storeu_epi16() {
59060 let a = _mm256_set1_epi32(i32::MAX);
59061 let mut r = _mm_undefined_si128();
59062 unsafe {
59063 _mm256_mask_cvtusepi32_storeu_epi16(&mut r as *mut _ as *mut i16, 0b11111111, a);
59064 }
59065 let e = _mm_set1_epi16(u16::MAX as i16);
59066 assert_eq_m128i(r, e);
59067 }
59068
59069 #[simd_test(enable = "avx512f,avx512vl")]
59070 fn test_mm_mask_cvtusepi32_storeu_epi16() {
59071 let a = _mm_set1_epi32(i32::MAX);
59072 let mut r = _mm_set1_epi8(0);
59073 unsafe {
59074 _mm_mask_cvtusepi32_storeu_epi16(&mut r as *mut _ as *mut i16, 0b11111111, a);
59075 }
59076 let e = _mm_set_epi16(
59077 0,
59078 0,
59079 0,
59080 0,
59081 u16::MAX as i16,
59082 u16::MAX as i16,
59083 u16::MAX as i16,
59084 u16::MAX as i16,
59085 );
59086 assert_eq_m128i(r, e);
59087 }
59088
59089 #[simd_test(enable = "avx512f")]
59090 fn test_mm512_mask_cvtepi32_storeu_epi8() {
59091 let a = _mm512_set1_epi32(9);
59092 let mut r = _mm_undefined_si128();
59093 unsafe {
59094 _mm512_mask_cvtepi32_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111_11111111, a);
59095 }
59096 let e = _mm_set1_epi8(9);
59097 assert_eq_m128i(r, e);
59098 }
59099
59100 #[simd_test(enable = "avx512f,avx512vl")]
59101 fn test_mm256_mask_cvtepi32_storeu_epi8() {
59102 let a = _mm256_set1_epi32(9);
59103 let mut r = _mm_set1_epi8(0);
59104 unsafe {
59105 _mm256_mask_cvtepi32_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a);
59106 }
59107 let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 9, 9, 9, 9, 9, 9, 9, 9);
59108 assert_eq_m128i(r, e);
59109 }
59110
59111 #[simd_test(enable = "avx512f,avx512vl")]
59112 fn test_mm_mask_cvtepi32_storeu_epi8() {
59113 let a = _mm_set1_epi32(9);
59114 let mut r = _mm_set1_epi8(0);
59115 unsafe {
59116 _mm_mask_cvtepi32_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a);
59117 }
59118 let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 9, 9, 9);
59119 assert_eq_m128i(r, e);
59120 }
59121
59122 #[simd_test(enable = "avx512f")]
59123 fn test_mm512_mask_cvtsepi32_storeu_epi8() {
59124 let a = _mm512_set1_epi32(i32::MAX);
59125 let mut r = _mm_undefined_si128();
59126 unsafe {
59127 _mm512_mask_cvtsepi32_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111_11111111, a);
59128 }
59129 let e = _mm_set1_epi8(i8::MAX);
59130 assert_eq_m128i(r, e);
59131 }
59132
59133 #[simd_test(enable = "avx512f,avx512vl")]
59134 fn test_mm256_mask_cvtsepi32_storeu_epi8() {
59135 let a = _mm256_set1_epi32(i32::MAX);
59136 let mut r = _mm_set1_epi8(0);
59137 unsafe {
59138 _mm256_mask_cvtsepi32_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a);
59139 }
59140 #[rustfmt::skip]
59141 let e = _mm_set_epi8(
59142 0, 0, 0, 0,
59143 0, 0, 0, 0,
59144 i8::MAX, i8::MAX, i8::MAX, i8::MAX,
59145 i8::MAX, i8::MAX, i8::MAX, i8::MAX,
59146 );
59147 assert_eq_m128i(r, e);
59148 }
59149
59150 #[simd_test(enable = "avx512f,avx512vl")]
59151 fn test_mm_mask_cvtsepi32_storeu_epi8() {
59152 let a = _mm_set1_epi32(i32::MAX);
59153 let mut r = _mm_set1_epi8(0);
59154 unsafe {
59155 _mm_mask_cvtsepi32_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a);
59156 }
59157 #[rustfmt::skip]
59158 let e = _mm_set_epi8(
59159 0, 0, 0, 0,
59160 0, 0, 0, 0,
59161 0, 0, 0, 0,
59162 i8::MAX, i8::MAX, i8::MAX, i8::MAX,
59163 );
59164 assert_eq_m128i(r, e);
59165 }
59166
59167 #[simd_test(enable = "avx512f")]
59168 fn test_mm512_mask_cvtusepi32_storeu_epi8() {
59169 let a = _mm512_set1_epi32(i32::MAX);
59170 let mut r = _mm_undefined_si128();
59171 unsafe {
59172 _mm512_mask_cvtusepi32_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111_11111111, a);
59173 }
59174 let e = _mm_set1_epi8(u8::MAX as i8);
59175 assert_eq_m128i(r, e);
59176 }
59177
59178 #[simd_test(enable = "avx512f,avx512vl")]
59179 fn test_mm256_mask_cvtusepi32_storeu_epi8() {
59180 let a = _mm256_set1_epi32(i32::MAX);
59181 let mut r = _mm_set1_epi8(0);
59182 unsafe {
59183 _mm256_mask_cvtusepi32_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a);
59184 }
59185 #[rustfmt::skip]
59186 let e = _mm_set_epi8(
59187 0, 0, 0, 0,
59188 0, 0, 0, 0,
59189 u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8,
59190 u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8,
59191 );
59192 assert_eq_m128i(r, e);
59193 }
59194
59195 #[simd_test(enable = "avx512f,avx512vl")]
59196 fn test_mm_mask_cvtusepi32_storeu_epi8() {
59197 let a = _mm_set1_epi32(i32::MAX);
59198 let mut r = _mm_set1_epi8(0);
59199 unsafe {
59200 _mm_mask_cvtusepi32_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a);
59201 }
59202 #[rustfmt::skip]
59203 let e = _mm_set_epi8(
59204 0, 0, 0, 0,
59205 0, 0, 0, 0,
59206 0, 0, 0, 0,
59207 u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8,
59208 );
59209 assert_eq_m128i(r, e);
59210 }
59211
59212 #[simd_test(enable = "avx512f")]
59213 const fn test_mm512_storeu_epi32() {
59214 let a = _mm512_set1_epi32(9);
59215 let mut r = _mm512_undefined_epi32();
59216 unsafe {
59217 _mm512_storeu_epi32(&mut r as *mut _ as *mut i32, a);
59218 }
59219 assert_eq_m512i(r, a);
59220 }
59221
59222 #[simd_test(enable = "avx512f,avx512vl")]
59223 const fn test_mm256_storeu_epi32() {
59224 let a = _mm256_set1_epi32(9);
59225 let mut r = _mm256_undefined_si256();
59226 unsafe {
59227 _mm256_storeu_epi32(&mut r as *mut _ as *mut i32, a);
59228 }
59229 assert_eq_m256i(r, a);
59230 }
59231
59232 #[simd_test(enable = "avx512f,avx512vl")]
59233 const fn test_mm_storeu_epi32() {
59234 let a = _mm_set1_epi32(9);
59235 let mut r = _mm_undefined_si128();
59236 unsafe {
59237 _mm_storeu_epi32(&mut r as *mut _ as *mut i32, a);
59238 }
59239 assert_eq_m128i(r, a);
59240 }
59241
59242 #[simd_test(enable = "avx512f")]
59243 const fn test_mm512_loadu_si512() {
59244 let a = &[4, 3, 2, 5, 8, 9, 64, 50, -4, -3, -2, -5, -8, -9, -64, -50];
59245 let p = a.as_ptr().cast();
59246 let r = unsafe { _mm512_loadu_si512(black_box(p)) };
59247 let e = _mm512_setr_epi32(4, 3, 2, 5, 8, 9, 64, 50, -4, -3, -2, -5, -8, -9, -64, -50);
59248 assert_eq_m512i(r, e);
59249 }
59250
59251 #[simd_test(enable = "avx512f")]
59252 const fn test_mm512_storeu_si512() {
59253 let a = _mm512_set1_epi32(9);
59254 let mut r = _mm512_undefined_epi32();
59255 unsafe {
59256 _mm512_storeu_si512(&mut r as *mut _, a);
59257 }
59258 assert_eq_m512i(r, a);
59259 }
59260
59261 #[simd_test(enable = "avx512f")]
59262 const fn test_mm512_load_si512() {
59263 #[repr(align(64))]
59264 struct Align {
59265 data: [i32; 16], // 64 bytes
59266 }
59267 let a = Align {
59268 data: [4, 3, 2, 5, 8, 9, 64, 50, -4, -3, -2, -5, -8, -9, -64, -50],
59269 };
59270 let p = (a.data).as_ptr().cast();
59271 let r = unsafe { _mm512_load_si512(black_box(p)) };
59272 let e = _mm512_setr_epi32(4, 3, 2, 5, 8, 9, 64, 50, -4, -3, -2, -5, -8, -9, -64, -50);
59273 assert_eq_m512i(r, e);
59274 }
59275
59276 #[simd_test(enable = "avx512f")]
59277 const fn test_mm512_store_si512() {
59278 let a = _mm512_set1_epi32(9);
59279 let mut r = _mm512_undefined_epi32();
59280 unsafe {
59281 _mm512_store_si512(&mut r as *mut _, a);
59282 }
59283 assert_eq_m512i(r, a);
59284 }
59285
59286 #[simd_test(enable = "avx512f")]
59287 const fn test_mm512_load_epi32() {
59288 #[repr(align(64))]
59289 struct Align {
59290 data: [i32; 16], // 64 bytes
59291 }
59292 let a = Align {
59293 data: [4, 3, 2, 5, 8, 9, 64, 50, -4, -3, -2, -5, -8, -9, -64, -50],
59294 };
59295 let p = (a.data).as_ptr();
59296 let r = unsafe { _mm512_load_epi32(black_box(p)) };
59297 let e = _mm512_setr_epi32(4, 3, 2, 5, 8, 9, 64, 50, -4, -3, -2, -5, -8, -9, -64, -50);
59298 assert_eq_m512i(r, e);
59299 }
59300
59301 #[simd_test(enable = "avx512f,avx512vl")]
59302 const fn test_mm256_load_epi32() {
59303 #[repr(align(64))]
59304 struct Align {
59305 data: [i32; 8],
59306 }
59307 let a = Align {
59308 data: [4, 3, 2, 5, 8, 9, 64, 50],
59309 };
59310 let p = (a.data).as_ptr();
59311 let r = unsafe { _mm256_load_epi32(black_box(p)) };
59312 let e = _mm256_setr_epi32(4, 3, 2, 5, 8, 9, 64, 50);
59313 assert_eq_m256i(r, e);
59314 }
59315
59316 #[simd_test(enable = "avx512f,avx512vl")]
59317 const fn test_mm_load_epi32() {
59318 #[repr(align(64))]
59319 struct Align {
59320 data: [i32; 4],
59321 }
59322 let a = Align { data: [4, 3, 2, 5] };
59323 let p = (a.data).as_ptr();
59324 let r = unsafe { _mm_load_epi32(black_box(p)) };
59325 let e = _mm_setr_epi32(4, 3, 2, 5);
59326 assert_eq_m128i(r, e);
59327 }
59328
59329 #[simd_test(enable = "avx512f")]
59330 const fn test_mm512_store_epi32() {
59331 let a = _mm512_set1_epi32(9);
59332 let mut r = _mm512_undefined_epi32();
59333 unsafe {
59334 _mm512_store_epi32(&mut r as *mut _ as *mut i32, a);
59335 }
59336 assert_eq_m512i(r, a);
59337 }
59338
59339 #[simd_test(enable = "avx512f,avx512vl")]
59340 const fn test_mm256_store_epi32() {
59341 let a = _mm256_set1_epi32(9);
59342 let mut r = _mm256_undefined_si256();
59343 unsafe {
59344 _mm256_store_epi32(&mut r as *mut _ as *mut i32, a);
59345 }
59346 assert_eq_m256i(r, a);
59347 }
59348
59349 #[simd_test(enable = "avx512f,avx512vl")]
59350 const fn test_mm_store_epi32() {
59351 let a = _mm_set1_epi32(9);
59352 let mut r = _mm_undefined_si128();
59353 unsafe {
59354 _mm_store_epi32(&mut r as *mut _ as *mut i32, a);
59355 }
59356 assert_eq_m128i(r, a);
59357 }
59358
59359 #[simd_test(enable = "avx512f")]
59360 const fn test_mm512_load_ps() {
59361 #[repr(align(64))]
59362 struct Align {
59363 data: [f32; 16], // 64 bytes
59364 }
59365 let a = Align {
59366 data: [
59367 4., 3., 2., 5., 8., 9., 64., 50., -4., -3., -2., -5., -8., -9., -64., -50.,
59368 ],
59369 };
59370 let p = (a.data).as_ptr();
59371 let r = unsafe { _mm512_load_ps(black_box(p)) };
59372 let e = _mm512_setr_ps(
59373 4., 3., 2., 5., 8., 9., 64., 50., -4., -3., -2., -5., -8., -9., -64., -50.,
59374 );
59375 assert_eq_m512(r, e);
59376 }
59377
59378 #[simd_test(enable = "avx512f")]
59379 const fn test_mm512_store_ps() {
59380 let a = _mm512_set1_ps(9.);
59381 let mut r = _mm512_undefined_ps();
59382 unsafe {
59383 _mm512_store_ps(&mut r as *mut _ as *mut f32, a);
59384 }
59385 assert_eq_m512(r, a);
59386 }
59387
59388 #[simd_test(enable = "avx512f")]
59389 const fn test_mm512_mask_set1_epi32() {
59390 let src = _mm512_set1_epi32(2);
59391 let a: i32 = 11;
59392 let r = _mm512_mask_set1_epi32(src, 0, a);
59393 assert_eq_m512i(r, src);
59394 let r = _mm512_mask_set1_epi32(src, 0b11111111_11111111, a);
59395 let e = _mm512_set1_epi32(11);
59396 assert_eq_m512i(r, e);
59397 }
59398
59399 #[simd_test(enable = "avx512f")]
59400 const fn test_mm512_maskz_set1_epi32() {
59401 let a: i32 = 11;
59402 let r = _mm512_maskz_set1_epi32(0, a);
59403 assert_eq_m512i(r, _mm512_setzero_si512());
59404 let r = _mm512_maskz_set1_epi32(0b11111111_11111111, a);
59405 let e = _mm512_set1_epi32(11);
59406 assert_eq_m512i(r, e);
59407 }
59408
59409 #[simd_test(enable = "avx512f,avx512vl")]
59410 const fn test_mm256_mask_set1_epi32() {
59411 let src = _mm256_set1_epi32(2);
59412 let a: i32 = 11;
59413 let r = _mm256_mask_set1_epi32(src, 0, a);
59414 assert_eq_m256i(r, src);
59415 let r = _mm256_mask_set1_epi32(src, 0b11111111, a);
59416 let e = _mm256_set1_epi32(11);
59417 assert_eq_m256i(r, e);
59418 }
59419
59420 #[simd_test(enable = "avx512f,avx512vl")]
59421 const fn test_mm256_maskz_set1_epi32() {
59422 let a: i32 = 11;
59423 let r = _mm256_maskz_set1_epi32(0, a);
59424 assert_eq_m256i(r, _mm256_setzero_si256());
59425 let r = _mm256_maskz_set1_epi32(0b11111111, a);
59426 let e = _mm256_set1_epi32(11);
59427 assert_eq_m256i(r, e);
59428 }
59429
59430 #[simd_test(enable = "avx512f,avx512vl")]
59431 const fn test_mm_mask_set1_epi32() {
59432 let src = _mm_set1_epi32(2);
59433 let a: i32 = 11;
59434 let r = _mm_mask_set1_epi32(src, 0, a);
59435 assert_eq_m128i(r, src);
59436 let r = _mm_mask_set1_epi32(src, 0b00001111, a);
59437 let e = _mm_set1_epi32(11);
59438 assert_eq_m128i(r, e);
59439 }
59440
59441 #[simd_test(enable = "avx512f,avx512vl")]
59442 const fn test_mm_maskz_set1_epi32() {
59443 let a: i32 = 11;
59444 let r = _mm_maskz_set1_epi32(0, a);
59445 assert_eq_m128i(r, _mm_setzero_si128());
59446 let r = _mm_maskz_set1_epi32(0b00001111, a);
59447 let e = _mm_set1_epi32(11);
59448 assert_eq_m128i(r, e);
59449 }
59450
59451 #[simd_test(enable = "avx512f")]
59452 const fn test_mm_mask_move_ss() {
59453 let src = _mm_set_ps(10., 11., 100., 110.);
59454 let a = _mm_set_ps(1., 2., 10., 20.);
59455 let b = _mm_set_ps(3., 4., 30., 40.);
59456 let r = _mm_mask_move_ss(src, 0, a, b);
59457 let e = _mm_set_ps(1., 2., 10., 110.);
59458 assert_eq_m128(r, e);
59459 let r = _mm_mask_move_ss(src, 0b11111111, a, b);
59460 let e = _mm_set_ps(1., 2., 10., 40.);
59461 assert_eq_m128(r, e);
59462 }
59463
59464 #[simd_test(enable = "avx512f")]
59465 const fn test_mm_maskz_move_ss() {
59466 let a = _mm_set_ps(1., 2., 10., 20.);
59467 let b = _mm_set_ps(3., 4., 30., 40.);
59468 let r = _mm_maskz_move_ss(0, a, b);
59469 let e = _mm_set_ps(1., 2., 10., 0.);
59470 assert_eq_m128(r, e);
59471 let r = _mm_maskz_move_ss(0b11111111, a, b);
59472 let e = _mm_set_ps(1., 2., 10., 40.);
59473 assert_eq_m128(r, e);
59474 }
59475
59476 #[simd_test(enable = "avx512f")]
59477 const fn test_mm_mask_move_sd() {
59478 let src = _mm_set_pd(10., 11.);
59479 let a = _mm_set_pd(1., 2.);
59480 let b = _mm_set_pd(3., 4.);
59481 let r = _mm_mask_move_sd(src, 0, a, b);
59482 let e = _mm_set_pd(1., 11.);
59483 assert_eq_m128d(r, e);
59484 let r = _mm_mask_move_sd(src, 0b11111111, a, b);
59485 let e = _mm_set_pd(1., 4.);
59486 assert_eq_m128d(r, e);
59487 }
59488
59489 #[simd_test(enable = "avx512f")]
59490 const fn test_mm_maskz_move_sd() {
59491 let a = _mm_set_pd(1., 2.);
59492 let b = _mm_set_pd(3., 4.);
59493 let r = _mm_maskz_move_sd(0, a, b);
59494 let e = _mm_set_pd(1., 0.);
59495 assert_eq_m128d(r, e);
59496 let r = _mm_maskz_move_sd(0b11111111, a, b);
59497 let e = _mm_set_pd(1., 4.);
59498 assert_eq_m128d(r, e);
59499 }
59500
59501 #[simd_test(enable = "avx512f")]
59502 const fn test_mm_mask_add_ss() {
59503 let src = _mm_set_ps(10., 11., 100., 110.);
59504 let a = _mm_set_ps(1., 2., 10., 20.);
59505 let b = _mm_set_ps(3., 4., 30., 40.);
59506 let r = _mm_mask_add_ss(src, 0, a, b);
59507 let e = _mm_set_ps(1., 2., 10., 110.);
59508 assert_eq_m128(r, e);
59509 let r = _mm_mask_add_ss(src, 0b11111111, a, b);
59510 let e = _mm_set_ps(1., 2., 10., 60.);
59511 assert_eq_m128(r, e);
59512 }
59513
59514 #[simd_test(enable = "avx512f")]
59515 const fn test_mm_maskz_add_ss() {
59516 let a = _mm_set_ps(1., 2., 10., 20.);
59517 let b = _mm_set_ps(3., 4., 30., 40.);
59518 let r = _mm_maskz_add_ss(0, a, b);
59519 let e = _mm_set_ps(1., 2., 10., 0.);
59520 assert_eq_m128(r, e);
59521 let r = _mm_maskz_add_ss(0b11111111, a, b);
59522 let e = _mm_set_ps(1., 2., 10., 60.);
59523 assert_eq_m128(r, e);
59524 }
59525
59526 #[simd_test(enable = "avx512f")]
59527 const fn test_mm_mask_add_sd() {
59528 let src = _mm_set_pd(10., 11.);
59529 let a = _mm_set_pd(1., 2.);
59530 let b = _mm_set_pd(3., 4.);
59531 let r = _mm_mask_add_sd(src, 0, a, b);
59532 let e = _mm_set_pd(1., 11.);
59533 assert_eq_m128d(r, e);
59534 let r = _mm_mask_add_sd(src, 0b11111111, a, b);
59535 let e = _mm_set_pd(1., 6.);
59536 assert_eq_m128d(r, e);
59537 }
59538
59539 #[simd_test(enable = "avx512f")]
59540 const fn test_mm_maskz_add_sd() {
59541 let a = _mm_set_pd(1., 2.);
59542 let b = _mm_set_pd(3., 4.);
59543 let r = _mm_maskz_add_sd(0, a, b);
59544 let e = _mm_set_pd(1., 0.);
59545 assert_eq_m128d(r, e);
59546 let r = _mm_maskz_add_sd(0b11111111, a, b);
59547 let e = _mm_set_pd(1., 6.);
59548 assert_eq_m128d(r, e);
59549 }
59550
59551 #[simd_test(enable = "avx512f")]
59552 const fn test_mm_mask_sub_ss() {
59553 let src = _mm_set_ps(10., 11., 100., 110.);
59554 let a = _mm_set_ps(1., 2., 10., 20.);
59555 let b = _mm_set_ps(3., 4., 30., 40.);
59556 let r = _mm_mask_sub_ss(src, 0, a, b);
59557 let e = _mm_set_ps(1., 2., 10., 110.);
59558 assert_eq_m128(r, e);
59559 let r = _mm_mask_sub_ss(src, 0b11111111, a, b);
59560 let e = _mm_set_ps(1., 2., 10., -20.);
59561 assert_eq_m128(r, e);
59562 }
59563
59564 #[simd_test(enable = "avx512f")]
59565 const fn test_mm_maskz_sub_ss() {
59566 let a = _mm_set_ps(1., 2., 10., 20.);
59567 let b = _mm_set_ps(3., 4., 30., 40.);
59568 let r = _mm_maskz_sub_ss(0, a, b);
59569 let e = _mm_set_ps(1., 2., 10., 0.);
59570 assert_eq_m128(r, e);
59571 let r = _mm_maskz_sub_ss(0b11111111, a, b);
59572 let e = _mm_set_ps(1., 2., 10., -20.);
59573 assert_eq_m128(r, e);
59574 }
59575
59576 #[simd_test(enable = "avx512f")]
59577 const fn test_mm_mask_sub_sd() {
59578 let src = _mm_set_pd(10., 11.);
59579 let a = _mm_set_pd(1., 2.);
59580 let b = _mm_set_pd(3., 4.);
59581 let r = _mm_mask_sub_sd(src, 0, a, b);
59582 let e = _mm_set_pd(1., 11.);
59583 assert_eq_m128d(r, e);
59584 let r = _mm_mask_sub_sd(src, 0b11111111, a, b);
59585 let e = _mm_set_pd(1., -2.);
59586 assert_eq_m128d(r, e);
59587 }
59588
59589 #[simd_test(enable = "avx512f")]
59590 const fn test_mm_maskz_sub_sd() {
59591 let a = _mm_set_pd(1., 2.);
59592 let b = _mm_set_pd(3., 4.);
59593 let r = _mm_maskz_sub_sd(0, a, b);
59594 let e = _mm_set_pd(1., 0.);
59595 assert_eq_m128d(r, e);
59596 let r = _mm_maskz_sub_sd(0b11111111, a, b);
59597 let e = _mm_set_pd(1., -2.);
59598 assert_eq_m128d(r, e);
59599 }
59600
59601 #[simd_test(enable = "avx512f")]
59602 const fn test_mm_mask_mul_ss() {
59603 let src = _mm_set_ps(10., 11., 100., 110.);
59604 let a = _mm_set_ps(1., 2., 10., 20.);
59605 let b = _mm_set_ps(3., 4., 30., 40.);
59606 let r = _mm_mask_mul_ss(src, 0, a, b);
59607 let e = _mm_set_ps(1., 2., 10., 110.);
59608 assert_eq_m128(r, e);
59609 let r = _mm_mask_mul_ss(src, 0b11111111, a, b);
59610 let e = _mm_set_ps(1., 2., 10., 800.);
59611 assert_eq_m128(r, e);
59612 }
59613
59614 #[simd_test(enable = "avx512f")]
59615 const fn test_mm_maskz_mul_ss() {
59616 let a = _mm_set_ps(1., 2., 10., 20.);
59617 let b = _mm_set_ps(3., 4., 30., 40.);
59618 let r = _mm_maskz_mul_ss(0, a, b);
59619 let e = _mm_set_ps(1., 2., 10., 0.);
59620 assert_eq_m128(r, e);
59621 let r = _mm_maskz_mul_ss(0b11111111, a, b);
59622 let e = _mm_set_ps(1., 2., 10., 800.);
59623 assert_eq_m128(r, e);
59624 }
59625
59626 #[simd_test(enable = "avx512f")]
59627 const fn test_mm_mask_mul_sd() {
59628 let src = _mm_set_pd(10., 11.);
59629 let a = _mm_set_pd(1., 2.);
59630 let b = _mm_set_pd(3., 4.);
59631 let r = _mm_mask_mul_sd(src, 0, a, b);
59632 let e = _mm_set_pd(1., 11.);
59633 assert_eq_m128d(r, e);
59634 let r = _mm_mask_mul_sd(src, 0b11111111, a, b);
59635 let e = _mm_set_pd(1., 8.);
59636 assert_eq_m128d(r, e);
59637 }
59638
59639 #[simd_test(enable = "avx512f")]
59640 const fn test_mm_maskz_mul_sd() {
59641 let a = _mm_set_pd(1., 2.);
59642 let b = _mm_set_pd(3., 4.);
59643 let r = _mm_maskz_mul_sd(0, a, b);
59644 let e = _mm_set_pd(1., 0.);
59645 assert_eq_m128d(r, e);
59646 let r = _mm_maskz_mul_sd(0b11111111, a, b);
59647 let e = _mm_set_pd(1., 8.);
59648 assert_eq_m128d(r, e);
59649 }
59650
59651 #[simd_test(enable = "avx512f")]
59652 const fn test_mm_mask_div_ss() {
59653 let src = _mm_set_ps(10., 11., 100., 110.);
59654 let a = _mm_set_ps(1., 2., 10., 20.);
59655 let b = _mm_set_ps(3., 4., 30., 40.);
59656 let r = _mm_mask_div_ss(src, 0, a, b);
59657 let e = _mm_set_ps(1., 2., 10., 110.);
59658 assert_eq_m128(r, e);
59659 let r = _mm_mask_div_ss(src, 0b11111111, a, b);
59660 let e = _mm_set_ps(1., 2., 10., 0.5);
59661 assert_eq_m128(r, e);
59662 }
59663
59664 #[simd_test(enable = "avx512f")]
59665 const fn test_mm_maskz_div_ss() {
59666 let a = _mm_set_ps(1., 2., 10., 20.);
59667 let b = _mm_set_ps(3., 4., 30., 40.);
59668 let r = _mm_maskz_div_ss(0, a, b);
59669 let e = _mm_set_ps(1., 2., 10., 0.);
59670 assert_eq_m128(r, e);
59671 let r = _mm_maskz_div_ss(0b11111111, a, b);
59672 let e = _mm_set_ps(1., 2., 10., 0.5);
59673 assert_eq_m128(r, e);
59674 }
59675
59676 #[simd_test(enable = "avx512f")]
59677 const fn test_mm_mask_div_sd() {
59678 let src = _mm_set_pd(10., 11.);
59679 let a = _mm_set_pd(1., 2.);
59680 let b = _mm_set_pd(3., 4.);
59681 let r = _mm_mask_div_sd(src, 0, a, b);
59682 let e = _mm_set_pd(1., 11.);
59683 assert_eq_m128d(r, e);
59684 let r = _mm_mask_div_sd(src, 0b11111111, a, b);
59685 let e = _mm_set_pd(1., 0.5);
59686 assert_eq_m128d(r, e);
59687 }
59688
59689 #[simd_test(enable = "avx512f")]
59690 const fn test_mm_maskz_div_sd() {
59691 let a = _mm_set_pd(1., 2.);
59692 let b = _mm_set_pd(3., 4.);
59693 let r = _mm_maskz_div_sd(0, a, b);
59694 let e = _mm_set_pd(1., 0.);
59695 assert_eq_m128d(r, e);
59696 let r = _mm_maskz_div_sd(0b11111111, a, b);
59697 let e = _mm_set_pd(1., 0.5);
59698 assert_eq_m128d(r, e);
59699 }
59700
59701 #[simd_test(enable = "avx512f")]
59702 fn test_mm_mask_max_ss() {
59703 let a = _mm_set_ps(0., 1., 2., 3.);
59704 let b = _mm_set_ps(4., 5., 6., 7.);
59705 let r = _mm_mask_max_ss(a, 0, a, b);
59706 let e = _mm_set_ps(0., 1., 2., 3.);
59707 assert_eq_m128(r, e);
59708 let r = _mm_mask_max_ss(a, 0b11111111, a, b);
59709 let e = _mm_set_ps(0., 1., 2., 7.);
59710 assert_eq_m128(r, e);
59711 }
59712
59713 #[simd_test(enable = "avx512f")]
59714 fn test_mm_maskz_max_ss() {
59715 let a = _mm_set_ps(0., 1., 2., 3.);
59716 let b = _mm_set_ps(4., 5., 6., 7.);
59717 let r = _mm_maskz_max_ss(0, a, b);
59718 let e = _mm_set_ps(0., 1., 2., 0.);
59719 assert_eq_m128(r, e);
59720 let r = _mm_maskz_max_ss(0b11111111, a, b);
59721 let e = _mm_set_ps(0., 1., 2., 7.);
59722 assert_eq_m128(r, e);
59723 }
59724
59725 #[simd_test(enable = "avx512f")]
59726 fn test_mm_mask_max_sd() {
59727 let a = _mm_set_pd(0., 1.);
59728 let b = _mm_set_pd(2., 3.);
59729 let r = _mm_mask_max_sd(a, 0, a, b);
59730 let e = _mm_set_pd(0., 1.);
59731 assert_eq_m128d(r, e);
59732 let r = _mm_mask_max_sd(a, 0b11111111, a, b);
59733 let e = _mm_set_pd(0., 3.);
59734 assert_eq_m128d(r, e);
59735 }
59736
59737 #[simd_test(enable = "avx512f")]
59738 fn test_mm_maskz_max_sd() {
59739 let a = _mm_set_pd(0., 1.);
59740 let b = _mm_set_pd(2., 3.);
59741 let r = _mm_maskz_max_sd(0, a, b);
59742 let e = _mm_set_pd(0., 0.);
59743 assert_eq_m128d(r, e);
59744 let r = _mm_maskz_max_sd(0b11111111, a, b);
59745 let e = _mm_set_pd(0., 3.);
59746 assert_eq_m128d(r, e);
59747 }
59748
59749 #[simd_test(enable = "avx512f")]
59750 fn test_mm_mask_min_ss() {
59751 let a = _mm_set_ps(0., 1., 2., 3.);
59752 let b = _mm_set_ps(4., 5., 6., 7.);
59753 let r = _mm_mask_min_ss(a, 0, a, b);
59754 let e = _mm_set_ps(0., 1., 2., 3.);
59755 assert_eq_m128(r, e);
59756 let r = _mm_mask_min_ss(a, 0b11111111, a, b);
59757 let e = _mm_set_ps(0., 1., 2., 3.);
59758 assert_eq_m128(r, e);
59759 }
59760
59761 #[simd_test(enable = "avx512f")]
59762 fn test_mm_maskz_min_ss() {
59763 let a = _mm_set_ps(0., 1., 2., 3.);
59764 let b = _mm_set_ps(4., 5., 6., 7.);
59765 let r = _mm_maskz_min_ss(0, a, b);
59766 let e = _mm_set_ps(0., 1., 2., 0.);
59767 assert_eq_m128(r, e);
59768 let r = _mm_maskz_min_ss(0b11111111, a, b);
59769 let e = _mm_set_ps(0., 1., 2., 3.);
59770 assert_eq_m128(r, e);
59771 }
59772
59773 #[simd_test(enable = "avx512f")]
59774 fn test_mm_mask_min_sd() {
59775 let a = _mm_set_pd(0., 1.);
59776 let b = _mm_set_pd(2., 3.);
59777 let r = _mm_mask_min_sd(a, 0, a, b);
59778 let e = _mm_set_pd(0., 1.);
59779 assert_eq_m128d(r, e);
59780 let r = _mm_mask_min_sd(a, 0b11111111, a, b);
59781 let e = _mm_set_pd(0., 1.);
59782 assert_eq_m128d(r, e);
59783 }
59784
59785 #[simd_test(enable = "avx512f")]
59786 fn test_mm_maskz_min_sd() {
59787 let a = _mm_set_pd(0., 1.);
59788 let b = _mm_set_pd(2., 3.);
59789 let r = _mm_maskz_min_sd(0, a, b);
59790 let e = _mm_set_pd(0., 0.);
59791 assert_eq_m128d(r, e);
59792 let r = _mm_maskz_min_sd(0b11111111, a, b);
59793 let e = _mm_set_pd(0., 1.);
59794 assert_eq_m128d(r, e);
59795 }
59796
59797 #[simd_test(enable = "avx512f")]
59798 fn test_mm_mask_sqrt_ss() {
59799 let src = _mm_set_ps(10., 11., 100., 110.);
59800 let a = _mm_set_ps(1., 2., 10., 20.);
59801 let b = _mm_set_ps(3., 4., 30., 4.);
59802 let r = _mm_mask_sqrt_ss(src, 0, a, b);
59803 let e = _mm_set_ps(1., 2., 10., 110.);
59804 assert_eq_m128(r, e);
59805 let r = _mm_mask_sqrt_ss(src, 0b11111111, a, b);
59806 let e = _mm_set_ps(1., 2., 10., 2.);
59807 assert_eq_m128(r, e);
59808 }
59809
59810 #[simd_test(enable = "avx512f")]
59811 fn test_mm_maskz_sqrt_ss() {
59812 let a = _mm_set_ps(1., 2., 10., 20.);
59813 let b = _mm_set_ps(3., 4., 30., 4.);
59814 let r = _mm_maskz_sqrt_ss(0, a, b);
59815 let e = _mm_set_ps(1., 2., 10., 0.);
59816 assert_eq_m128(r, e);
59817 let r = _mm_maskz_sqrt_ss(0b11111111, a, b);
59818 let e = _mm_set_ps(1., 2., 10., 2.);
59819 assert_eq_m128(r, e);
59820 }
59821
59822 #[simd_test(enable = "avx512f")]
59823 fn test_mm_mask_sqrt_sd() {
59824 let src = _mm_set_pd(10., 11.);
59825 let a = _mm_set_pd(1., 2.);
59826 let b = _mm_set_pd(3., 4.);
59827 let r = _mm_mask_sqrt_sd(src, 0, a, b);
59828 let e = _mm_set_pd(1., 11.);
59829 assert_eq_m128d(r, e);
59830 let r = _mm_mask_sqrt_sd(src, 0b11111111, a, b);
59831 let e = _mm_set_pd(1., 2.);
59832 assert_eq_m128d(r, e);
59833 }
59834
59835 #[simd_test(enable = "avx512f")]
59836 fn test_mm_maskz_sqrt_sd() {
59837 let a = _mm_set_pd(1., 2.);
59838 let b = _mm_set_pd(3., 4.);
59839 let r = _mm_maskz_sqrt_sd(0, a, b);
59840 let e = _mm_set_pd(1., 0.);
59841 assert_eq_m128d(r, e);
59842 let r = _mm_maskz_sqrt_sd(0b11111111, a, b);
59843 let e = _mm_set_pd(1., 2.);
59844 assert_eq_m128d(r, e);
59845 }
59846
59847 #[simd_test(enable = "avx512f")]
59848 fn test_mm_rsqrt14_ss() {
59849 let a = _mm_set_ps(1., 2., 10., 20.);
59850 let b = _mm_set_ps(3., 4., 30., 4.);
59851 let r = _mm_rsqrt14_ss(a, b);
59852 let e = _mm_set_ps(1., 2., 10., 0.5);
59853 assert_eq_m128(r, e);
59854 }
59855
59856 #[simd_test(enable = "avx512f")]
59857 fn test_mm_mask_rsqrt14_ss() {
59858 let src = _mm_set_ps(10., 11., 100., 110.);
59859 let a = _mm_set_ps(1., 2., 10., 20.);
59860 let b = _mm_set_ps(3., 4., 30., 4.);
59861 let r = _mm_mask_rsqrt14_ss(src, 0, a, b);
59862 let e = _mm_set_ps(1., 2., 10., 110.);
59863 assert_eq_m128(r, e);
59864 let r = _mm_mask_rsqrt14_ss(src, 0b11111111, a, b);
59865 let e = _mm_set_ps(1., 2., 10., 0.5);
59866 assert_eq_m128(r, e);
59867 }
59868
59869 #[simd_test(enable = "avx512f")]
59870 fn test_mm_maskz_rsqrt14_ss() {
59871 let a = _mm_set_ps(1., 2., 10., 20.);
59872 let b = _mm_set_ps(3., 4., 30., 4.);
59873 let r = _mm_maskz_rsqrt14_ss(0, a, b);
59874 let e = _mm_set_ps(1., 2., 10., 0.);
59875 assert_eq_m128(r, e);
59876 let r = _mm_maskz_rsqrt14_ss(0b11111111, a, b);
59877 let e = _mm_set_ps(1., 2., 10., 0.5);
59878 assert_eq_m128(r, e);
59879 }
59880
59881 #[simd_test(enable = "avx512f")]
59882 fn test_mm_rsqrt14_sd() {
59883 let a = _mm_set_pd(1., 2.);
59884 let b = _mm_set_pd(3., 4.);
59885 let r = _mm_rsqrt14_sd(a, b);
59886 let e = _mm_set_pd(1., 0.5);
59887 assert_eq_m128d(r, e);
59888 }
59889
59890 #[simd_test(enable = "avx512f")]
59891 fn test_mm_mask_rsqrt14_sd() {
59892 let src = _mm_set_pd(10., 11.);
59893 let a = _mm_set_pd(1., 2.);
59894 let b = _mm_set_pd(3., 4.);
59895 let r = _mm_mask_rsqrt14_sd(src, 0, a, b);
59896 let e = _mm_set_pd(1., 11.);
59897 assert_eq_m128d(r, e);
59898 let r = _mm_mask_rsqrt14_sd(src, 0b11111111, a, b);
59899 let e = _mm_set_pd(1., 0.5);
59900 assert_eq_m128d(r, e);
59901 }
59902
59903 #[simd_test(enable = "avx512f")]
59904 fn test_mm_maskz_rsqrt14_sd() {
59905 let a = _mm_set_pd(1., 2.);
59906 let b = _mm_set_pd(3., 4.);
59907 let r = _mm_maskz_rsqrt14_sd(0, a, b);
59908 let e = _mm_set_pd(1., 0.);
59909 assert_eq_m128d(r, e);
59910 let r = _mm_maskz_rsqrt14_sd(0b11111111, a, b);
59911 let e = _mm_set_pd(1., 0.5);
59912 assert_eq_m128d(r, e);
59913 }
59914
59915 #[simd_test(enable = "avx512f")]
59916 fn test_mm_rcp14_ss() {
59917 let a = _mm_set_ps(1., 2., 10., 20.);
59918 let b = _mm_set_ps(3., 4., 30., 4.);
59919 let r = _mm_rcp14_ss(a, b);
59920 let e = _mm_set_ps(1., 2., 10., 0.25);
59921 assert_eq_m128(r, e);
59922 }
59923
59924 #[simd_test(enable = "avx512f")]
59925 fn test_mm_mask_rcp14_ss() {
59926 let src = _mm_set_ps(10., 11., 100., 110.);
59927 let a = _mm_set_ps(1., 2., 10., 20.);
59928 let b = _mm_set_ps(3., 4., 30., 4.);
59929 let r = _mm_mask_rcp14_ss(src, 0, a, b);
59930 let e = _mm_set_ps(1., 2., 10., 110.);
59931 assert_eq_m128(r, e);
59932 let r = _mm_mask_rcp14_ss(src, 0b11111111, a, b);
59933 let e = _mm_set_ps(1., 2., 10., 0.25);
59934 assert_eq_m128(r, e);
59935 }
59936
59937 #[simd_test(enable = "avx512f")]
59938 fn test_mm_maskz_rcp14_ss() {
59939 let a = _mm_set_ps(1., 2., 10., 20.);
59940 let b = _mm_set_ps(3., 4., 30., 4.);
59941 let r = _mm_maskz_rcp14_ss(0, a, b);
59942 let e = _mm_set_ps(1., 2., 10., 0.);
59943 assert_eq_m128(r, e);
59944 let r = _mm_maskz_rcp14_ss(0b11111111, a, b);
59945 let e = _mm_set_ps(1., 2., 10., 0.25);
59946 assert_eq_m128(r, e);
59947 }
59948
59949 #[simd_test(enable = "avx512f")]
59950 fn test_mm_rcp14_sd() {
59951 let a = _mm_set_pd(1., 2.);
59952 let b = _mm_set_pd(3., 4.);
59953 let r = _mm_rcp14_sd(a, b);
59954 let e = _mm_set_pd(1., 0.25);
59955 assert_eq_m128d(r, e);
59956 }
59957
59958 #[simd_test(enable = "avx512f")]
59959 fn test_mm_mask_rcp14_sd() {
59960 let src = _mm_set_pd(10., 11.);
59961 let a = _mm_set_pd(1., 2.);
59962 let b = _mm_set_pd(3., 4.);
59963 let r = _mm_mask_rcp14_sd(src, 0, a, b);
59964 let e = _mm_set_pd(1., 11.);
59965 assert_eq_m128d(r, e);
59966 let r = _mm_mask_rcp14_sd(src, 0b11111111, a, b);
59967 let e = _mm_set_pd(1., 0.25);
59968 assert_eq_m128d(r, e);
59969 }
59970
59971 #[simd_test(enable = "avx512f")]
59972 fn test_mm_maskz_rcp14_sd() {
59973 let a = _mm_set_pd(1., 2.);
59974 let b = _mm_set_pd(3., 4.);
59975 let r = _mm_maskz_rcp14_sd(0, a, b);
59976 let e = _mm_set_pd(1., 0.);
59977 assert_eq_m128d(r, e);
59978 let r = _mm_maskz_rcp14_sd(0b11111111, a, b);
59979 let e = _mm_set_pd(1., 0.25);
59980 assert_eq_m128d(r, e);
59981 }
59982
59983 #[simd_test(enable = "avx512f")]
59984 fn test_mm_getexp_ss() {
59985 let a = _mm_set1_ps(2.);
59986 let b = _mm_set1_ps(3.);
59987 let r = _mm_getexp_ss(a, b);
59988 let e = _mm_set_ps(2., 2., 2., 1.);
59989 assert_eq_m128(r, e);
59990 }
59991
59992 #[simd_test(enable = "avx512f")]
59993 fn test_mm_mask_getexp_ss() {
59994 let a = _mm_set1_ps(2.);
59995 let b = _mm_set1_ps(3.);
59996 let r = _mm_mask_getexp_ss(a, 0, a, b);
59997 let e = _mm_set_ps(2., 2., 2., 2.);
59998 assert_eq_m128(r, e);
59999 let r = _mm_mask_getexp_ss(a, 0b11111111, a, b);
60000 let e = _mm_set_ps(2., 2., 2., 1.);
60001 assert_eq_m128(r, e);
60002 }
60003
60004 #[simd_test(enable = "avx512f")]
60005 fn test_mm_maskz_getexp_ss() {
60006 let a = _mm_set1_ps(2.);
60007 let b = _mm_set1_ps(3.);
60008 let r = _mm_maskz_getexp_ss(0, a, b);
60009 let e = _mm_set_ps(2., 2., 2., 0.);
60010 assert_eq_m128(r, e);
60011 let r = _mm_maskz_getexp_ss(0b11111111, a, b);
60012 let e = _mm_set_ps(2., 2., 2., 1.);
60013 assert_eq_m128(r, e);
60014 }
60015
60016 #[simd_test(enable = "avx512f")]
60017 fn test_mm_getexp_sd() {
60018 let a = _mm_set1_pd(2.);
60019 let b = _mm_set1_pd(3.);
60020 let r = _mm_getexp_sd(a, b);
60021 let e = _mm_set_pd(2., 1.);
60022 assert_eq_m128d(r, e);
60023 }
60024
60025 #[simd_test(enable = "avx512f")]
60026 fn test_mm_mask_getexp_sd() {
60027 let a = _mm_set1_pd(2.);
60028 let b = _mm_set1_pd(3.);
60029 let r = _mm_mask_getexp_sd(a, 0, a, b);
60030 let e = _mm_set_pd(2., 2.);
60031 assert_eq_m128d(r, e);
60032 let r = _mm_mask_getexp_sd(a, 0b11111111, a, b);
60033 let e = _mm_set_pd(2., 1.);
60034 assert_eq_m128d(r, e);
60035 }
60036
60037 #[simd_test(enable = "avx512f")]
60038 fn test_mm_maskz_getexp_sd() {
60039 let a = _mm_set1_pd(2.);
60040 let b = _mm_set1_pd(3.);
60041 let r = _mm_maskz_getexp_sd(0, a, b);
60042 let e = _mm_set_pd(2., 0.);
60043 assert_eq_m128d(r, e);
60044 let r = _mm_maskz_getexp_sd(0b11111111, a, b);
60045 let e = _mm_set_pd(2., 1.);
60046 assert_eq_m128d(r, e);
60047 }
60048
60049 #[simd_test(enable = "avx512f")]
60050 fn test_mm_getmant_ss() {
60051 let a = _mm_set1_ps(20.);
60052 let b = _mm_set1_ps(10.);
60053 let r = _mm_getmant_ss::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, b);
60054 let e = _mm_set_ps(20., 20., 20., 1.25);
60055 assert_eq_m128(r, e);
60056 }
60057
60058 #[simd_test(enable = "avx512f")]
60059 fn test_mm_mask_getmant_ss() {
60060 let a = _mm_set1_ps(20.);
60061 let b = _mm_set1_ps(10.);
60062 let r = _mm_mask_getmant_ss::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, 0, a, b);
60063 let e = _mm_set_ps(20., 20., 20., 20.);
60064 assert_eq_m128(r, e);
60065 let r = _mm_mask_getmant_ss::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, 0b11111111, a, b);
60066 let e = _mm_set_ps(20., 20., 20., 1.25);
60067 assert_eq_m128(r, e);
60068 }
60069
60070 #[simd_test(enable = "avx512f")]
60071 fn test_mm_maskz_getmant_ss() {
60072 let a = _mm_set1_ps(20.);
60073 let b = _mm_set1_ps(10.);
60074 let r = _mm_maskz_getmant_ss::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(0, a, b);
60075 let e = _mm_set_ps(20., 20., 20., 0.);
60076 assert_eq_m128(r, e);
60077 let r = _mm_maskz_getmant_ss::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(0b11111111, a, b);
60078 let e = _mm_set_ps(20., 20., 20., 1.25);
60079 assert_eq_m128(r, e);
60080 }
60081
60082 #[simd_test(enable = "avx512f")]
60083 fn test_mm_getmant_sd() {
60084 let a = _mm_set1_pd(20.);
60085 let b = _mm_set1_pd(10.);
60086 let r = _mm_getmant_sd::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, b);
60087 let e = _mm_set_pd(20., 1.25);
60088 assert_eq_m128d(r, e);
60089 }
60090
60091 #[simd_test(enable = "avx512f")]
60092 fn test_mm_mask_getmant_sd() {
60093 let a = _mm_set1_pd(20.);
60094 let b = _mm_set1_pd(10.);
60095 let r = _mm_mask_getmant_sd::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, 0, a, b);
60096 let e = _mm_set_pd(20., 20.);
60097 assert_eq_m128d(r, e);
60098 let r = _mm_mask_getmant_sd::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, 0b11111111, a, b);
60099 let e = _mm_set_pd(20., 1.25);
60100 assert_eq_m128d(r, e);
60101 }
60102
60103 #[simd_test(enable = "avx512f")]
60104 fn test_mm_maskz_getmant_sd() {
60105 let a = _mm_set1_pd(20.);
60106 let b = _mm_set1_pd(10.);
60107 let r = _mm_maskz_getmant_sd::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(0, a, b);
60108 let e = _mm_set_pd(20., 0.);
60109 assert_eq_m128d(r, e);
60110 let r = _mm_maskz_getmant_sd::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(0b11111111, a, b);
60111 let e = _mm_set_pd(20., 1.25);
60112 assert_eq_m128d(r, e);
60113 }
60114
60115 #[simd_test(enable = "avx512f")]
60116 fn test_mm_roundscale_ss() {
60117 let a = _mm_set1_ps(2.2);
60118 let b = _mm_set1_ps(1.1);
60119 let r = _mm_roundscale_ss::<0>(a, b);
60120 let e = _mm_set_ps(2.2, 2.2, 2.2, 1.0);
60121 assert_eq_m128(r, e);
60122 }
60123
60124 #[simd_test(enable = "avx512f")]
60125 fn test_mm_mask_roundscale_ss() {
60126 let a = _mm_set1_ps(2.2);
60127 let b = _mm_set1_ps(1.1);
60128 let r = _mm_mask_roundscale_ss::<0>(a, 0, a, b);
60129 let e = _mm_set_ps(2.2, 2.2, 2.2, 2.2);
60130 assert_eq_m128(r, e);
60131 let r = _mm_mask_roundscale_ss::<0>(a, 0b11111111, a, b);
60132 let e = _mm_set_ps(2.2, 2.2, 2.2, 1.0);
60133 assert_eq_m128(r, e);
60134 }
60135
60136 #[simd_test(enable = "avx512f")]
60137 fn test_mm_maskz_roundscale_ss() {
60138 let a = _mm_set1_ps(2.2);
60139 let b = _mm_set1_ps(1.1);
60140 let r = _mm_maskz_roundscale_ss::<0>(0, a, b);
60141 let e = _mm_set_ps(2.2, 2.2, 2.2, 0.0);
60142 assert_eq_m128(r, e);
60143 let r = _mm_maskz_roundscale_ss::<0>(0b11111111, a, b);
60144 let e = _mm_set_ps(2.2, 2.2, 2.2, 1.0);
60145 assert_eq_m128(r, e);
60146 }
60147
60148 #[simd_test(enable = "avx512f")]
60149 fn test_mm_roundscale_sd() {
60150 let a = _mm_set1_pd(2.2);
60151 let b = _mm_set1_pd(1.1);
60152 let r = _mm_roundscale_sd::<0>(a, b);
60153 let e = _mm_set_pd(2.2, 1.0);
60154 assert_eq_m128d(r, e);
60155 }
60156
60157 #[simd_test(enable = "avx512f")]
60158 fn test_mm_mask_roundscale_sd() {
60159 let a = _mm_set1_pd(2.2);
60160 let b = _mm_set1_pd(1.1);
60161 let r = _mm_mask_roundscale_sd::<0>(a, 0, a, b);
60162 let e = _mm_set_pd(2.2, 2.2);
60163 assert_eq_m128d(r, e);
60164 let r = _mm_mask_roundscale_sd::<0>(a, 0b11111111, a, b);
60165 let e = _mm_set_pd(2.2, 1.0);
60166 assert_eq_m128d(r, e);
60167 }
60168
60169 #[simd_test(enable = "avx512f")]
60170 fn test_mm_maskz_roundscale_sd() {
60171 let a = _mm_set1_pd(2.2);
60172 let b = _mm_set1_pd(1.1);
60173 let r = _mm_maskz_roundscale_sd::<0>(0, a, b);
60174 let e = _mm_set_pd(2.2, 0.0);
60175 assert_eq_m128d(r, e);
60176 let r = _mm_maskz_roundscale_sd::<0>(0b11111111, a, b);
60177 let e = _mm_set_pd(2.2, 1.0);
60178 assert_eq_m128d(r, e);
60179 }
60180
60181 #[simd_test(enable = "avx512f")]
60182 fn test_mm_scalef_ss() {
60183 let a = _mm_set1_ps(1.);
60184 let b = _mm_set1_ps(3.);
60185 let r = _mm_scalef_ss(a, b);
60186 let e = _mm_set_ps(1., 1., 1., 8.);
60187 assert_eq_m128(r, e);
60188 }
60189
60190 #[simd_test(enable = "avx512f")]
60191 fn test_mm_mask_scalef_ss() {
60192 let a = _mm_set1_ps(1.);
60193 let b = _mm_set1_ps(3.);
60194 let r = _mm_mask_scalef_ss(a, 0, a, b);
60195 let e = _mm_set_ps(1., 1., 1., 1.);
60196 assert_eq_m128(r, e);
60197 let r = _mm_mask_scalef_ss(a, 0b11111111, a, b);
60198 let e = _mm_set_ps(1., 1., 1., 8.);
60199 assert_eq_m128(r, e);
60200 }
60201
60202 #[simd_test(enable = "avx512f")]
60203 fn test_mm_maskz_scalef_ss() {
60204 let a = _mm_set1_ps(1.);
60205 let b = _mm_set1_ps(3.);
60206 let r = _mm_maskz_scalef_ss(0, a, b);
60207 let e = _mm_set_ps(1., 1., 1., 0.);
60208 assert_eq_m128(r, e);
60209 let r = _mm_maskz_scalef_ss(0b11111111, a, b);
60210 let e = _mm_set_ps(1., 1., 1., 8.);
60211 assert_eq_m128(r, e);
60212 }
60213
60214 #[simd_test(enable = "avx512f")]
60215 fn test_mm_scalef_sd() {
60216 let a = _mm_set1_pd(1.);
60217 let b = _mm_set1_pd(3.);
60218 let r = _mm_scalef_sd(a, b);
60219 let e = _mm_set_pd(1., 8.);
60220 assert_eq_m128d(r, e);
60221 }
60222
60223 #[simd_test(enable = "avx512f")]
60224 fn test_mm_mask_scalef_sd() {
60225 let a = _mm_set1_pd(1.);
60226 let b = _mm_set1_pd(3.);
60227 let r = _mm_mask_scalef_sd(a, 0, a, b);
60228 let e = _mm_set_pd(1., 1.);
60229 assert_eq_m128d(r, e);
60230 let r = _mm_mask_scalef_sd(a, 0b11111111, a, b);
60231 let e = _mm_set_pd(1., 8.);
60232 assert_eq_m128d(r, e);
60233 }
60234
60235 #[simd_test(enable = "avx512f")]
60236 fn test_mm_maskz_scalef_sd() {
60237 let a = _mm_set1_pd(1.);
60238 let b = _mm_set1_pd(3.);
60239 let r = _mm_maskz_scalef_sd(0, a, b);
60240 let e = _mm_set_pd(1., 0.);
60241 assert_eq_m128d(r, e);
60242 let r = _mm_maskz_scalef_sd(0b11111111, a, b);
60243 let e = _mm_set_pd(1., 8.);
60244 assert_eq_m128d(r, e);
60245 }
60246
60247 #[simd_test(enable = "avx512f")]
60248 const fn test_mm_mask_fmadd_ss() {
60249 let a = _mm_set1_ps(1.);
60250 let b = _mm_set1_ps(2.);
60251 let c = _mm_set1_ps(3.);
60252 let r = _mm_mask_fmadd_ss(a, 0, b, c);
60253 assert_eq_m128(r, a);
60254 let r = _mm_mask_fmadd_ss(a, 0b11111111, b, c);
60255 let e = _mm_set_ps(1., 1., 1., 5.);
60256 assert_eq_m128(r, e);
60257 }
60258
60259 #[simd_test(enable = "avx512f")]
60260 const fn test_mm_maskz_fmadd_ss() {
60261 let a = _mm_set1_ps(1.);
60262 let b = _mm_set1_ps(2.);
60263 let c = _mm_set1_ps(3.);
60264 let r = _mm_maskz_fmadd_ss(0, a, b, c);
60265 let e = _mm_set_ps(1., 1., 1., 0.);
60266 assert_eq_m128(r, e);
60267 let r = _mm_maskz_fmadd_ss(0b11111111, a, b, c);
60268 let e = _mm_set_ps(1., 1., 1., 5.);
60269 assert_eq_m128(r, e);
60270 }
60271
60272 #[simd_test(enable = "avx512f")]
60273 const fn test_mm_mask3_fmadd_ss() {
60274 let a = _mm_set1_ps(1.);
60275 let b = _mm_set1_ps(2.);
60276 let c = _mm_set1_ps(3.);
60277 let r = _mm_mask3_fmadd_ss(a, b, c, 0);
60278 assert_eq_m128(r, c);
60279 let r = _mm_mask3_fmadd_ss(a, b, c, 0b11111111);
60280 let e = _mm_set_ps(3., 3., 3., 5.);
60281 assert_eq_m128(r, e);
60282 }
60283
60284 #[simd_test(enable = "avx512f")]
60285 const fn test_mm_mask_fmadd_sd() {
60286 let a = _mm_set1_pd(1.);
60287 let b = _mm_set1_pd(2.);
60288 let c = _mm_set1_pd(3.);
60289 let r = _mm_mask_fmadd_sd(a, 0, b, c);
60290 assert_eq_m128d(r, a);
60291 let r = _mm_mask_fmadd_sd(a, 0b11111111, b, c);
60292 let e = _mm_set_pd(1., 5.);
60293 assert_eq_m128d(r, e);
60294 }
60295
60296 #[simd_test(enable = "avx512f")]
60297 const fn test_mm_maskz_fmadd_sd() {
60298 let a = _mm_set1_pd(1.);
60299 let b = _mm_set1_pd(2.);
60300 let c = _mm_set1_pd(3.);
60301 let r = _mm_maskz_fmadd_sd(0, a, b, c);
60302 let e = _mm_set_pd(1., 0.);
60303 assert_eq_m128d(r, e);
60304 let r = _mm_maskz_fmadd_sd(0b11111111, a, b, c);
60305 let e = _mm_set_pd(1., 5.);
60306 assert_eq_m128d(r, e);
60307 }
60308
60309 #[simd_test(enable = "avx512f")]
60310 const fn test_mm_mask3_fmadd_sd() {
60311 let a = _mm_set1_pd(1.);
60312 let b = _mm_set1_pd(2.);
60313 let c = _mm_set1_pd(3.);
60314 let r = _mm_mask3_fmadd_sd(a, b, c, 0);
60315 assert_eq_m128d(r, c);
60316 let r = _mm_mask3_fmadd_sd(a, b, c, 0b11111111);
60317 let e = _mm_set_pd(3., 5.);
60318 assert_eq_m128d(r, e);
60319 }
60320
60321 #[simd_test(enable = "avx512f")]
60322 const fn test_mm_mask_fmsub_ss() {
60323 let a = _mm_set1_ps(1.);
60324 let b = _mm_set1_ps(2.);
60325 let c = _mm_set1_ps(3.);
60326 let r = _mm_mask_fmsub_ss(a, 0, b, c);
60327 assert_eq_m128(r, a);
60328 let r = _mm_mask_fmsub_ss(a, 0b11111111, b, c);
60329 let e = _mm_set_ps(1., 1., 1., -1.);
60330 assert_eq_m128(r, e);
60331 }
60332
60333 #[simd_test(enable = "avx512f")]
60334 const fn test_mm_maskz_fmsub_ss() {
60335 let a = _mm_set1_ps(1.);
60336 let b = _mm_set1_ps(2.);
60337 let c = _mm_set1_ps(3.);
60338 let r = _mm_maskz_fmsub_ss(0, a, b, c);
60339 let e = _mm_set_ps(1., 1., 1., 0.);
60340 assert_eq_m128(r, e);
60341 let r = _mm_maskz_fmsub_ss(0b11111111, a, b, c);
60342 let e = _mm_set_ps(1., 1., 1., -1.);
60343 assert_eq_m128(r, e);
60344 }
60345
60346 #[simd_test(enable = "avx512f")]
60347 const fn test_mm_mask3_fmsub_ss() {
60348 let a = _mm_set1_ps(1.);
60349 let b = _mm_set1_ps(2.);
60350 let c = _mm_set1_ps(3.);
60351 let r = _mm_mask3_fmsub_ss(a, b, c, 0);
60352 assert_eq_m128(r, c);
60353 let r = _mm_mask3_fmsub_ss(a, b, c, 0b11111111);
60354 let e = _mm_set_ps(3., 3., 3., -1.);
60355 assert_eq_m128(r, e);
60356 }
60357
60358 #[simd_test(enable = "avx512f")]
60359 const fn test_mm_mask_fmsub_sd() {
60360 let a = _mm_set1_pd(1.);
60361 let b = _mm_set1_pd(2.);
60362 let c = _mm_set1_pd(3.);
60363 let r = _mm_mask_fmsub_sd(a, 0, b, c);
60364 assert_eq_m128d(r, a);
60365 let r = _mm_mask_fmsub_sd(a, 0b11111111, b, c);
60366 let e = _mm_set_pd(1., -1.);
60367 assert_eq_m128d(r, e);
60368 }
60369
60370 #[simd_test(enable = "avx512f")]
60371 const fn test_mm_maskz_fmsub_sd() {
60372 let a = _mm_set1_pd(1.);
60373 let b = _mm_set1_pd(2.);
60374 let c = _mm_set1_pd(3.);
60375 let r = _mm_maskz_fmsub_sd(0, a, b, c);
60376 let e = _mm_set_pd(1., 0.);
60377 assert_eq_m128d(r, e);
60378 let r = _mm_maskz_fmsub_sd(0b11111111, a, b, c);
60379 let e = _mm_set_pd(1., -1.);
60380 assert_eq_m128d(r, e);
60381 }
60382
60383 #[simd_test(enable = "avx512f")]
60384 const fn test_mm_mask3_fmsub_sd() {
60385 let a = _mm_set1_pd(1.);
60386 let b = _mm_set1_pd(2.);
60387 let c = _mm_set1_pd(3.);
60388 let r = _mm_mask3_fmsub_sd(a, b, c, 0);
60389 assert_eq_m128d(r, c);
60390 let r = _mm_mask3_fmsub_sd(a, b, c, 0b11111111);
60391 let e = _mm_set_pd(3., -1.);
60392 assert_eq_m128d(r, e);
60393 }
60394
60395 #[simd_test(enable = "avx512f")]
60396 const fn test_mm_mask_fnmadd_ss() {
60397 let a = _mm_set1_ps(1.);
60398 let b = _mm_set1_ps(2.);
60399 let c = _mm_set1_ps(3.);
60400 let r = _mm_mask_fnmadd_ss(a, 0, b, c);
60401 assert_eq_m128(r, a);
60402 let r = _mm_mask_fnmadd_ss(a, 0b11111111, b, c);
60403 let e = _mm_set_ps(1., 1., 1., 1.);
60404 assert_eq_m128(r, e);
60405 }
60406
60407 #[simd_test(enable = "avx512f")]
60408 const fn test_mm_maskz_fnmadd_ss() {
60409 let a = _mm_set1_ps(1.);
60410 let b = _mm_set1_ps(2.);
60411 let c = _mm_set1_ps(3.);
60412 let r = _mm_maskz_fnmadd_ss(0, a, b, c);
60413 let e = _mm_set_ps(1., 1., 1., 0.);
60414 assert_eq_m128(r, e);
60415 let r = _mm_maskz_fnmadd_ss(0b11111111, a, b, c);
60416 let e = _mm_set_ps(1., 1., 1., 1.);
60417 assert_eq_m128(r, e);
60418 }
60419
60420 #[simd_test(enable = "avx512f")]
60421 const fn test_mm_mask3_fnmadd_ss() {
60422 let a = _mm_set1_ps(1.);
60423 let b = _mm_set1_ps(2.);
60424 let c = _mm_set1_ps(3.);
60425 let r = _mm_mask3_fnmadd_ss(a, b, c, 0);
60426 assert_eq_m128(r, c);
60427 let r = _mm_mask3_fnmadd_ss(a, b, c, 0b11111111);
60428 let e = _mm_set_ps(3., 3., 3., 1.);
60429 assert_eq_m128(r, e);
60430 }
60431
60432 #[simd_test(enable = "avx512f")]
60433 const fn test_mm_mask_fnmadd_sd() {
60434 let a = _mm_set1_pd(1.);
60435 let b = _mm_set1_pd(2.);
60436 let c = _mm_set1_pd(3.);
60437 let r = _mm_mask_fnmadd_sd(a, 0, b, c);
60438 assert_eq_m128d(r, a);
60439 let r = _mm_mask_fnmadd_sd(a, 0b11111111, b, c);
60440 let e = _mm_set_pd(1., 1.);
60441 assert_eq_m128d(r, e);
60442 }
60443
60444 #[simd_test(enable = "avx512f")]
60445 const fn test_mm_maskz_fnmadd_sd() {
60446 let a = _mm_set1_pd(1.);
60447 let b = _mm_set1_pd(2.);
60448 let c = _mm_set1_pd(3.);
60449 let r = _mm_maskz_fnmadd_sd(0, a, b, c);
60450 let e = _mm_set_pd(1., 0.);
60451 assert_eq_m128d(r, e);
60452 let r = _mm_maskz_fnmadd_sd(0b11111111, a, b, c);
60453 let e = _mm_set_pd(1., 1.);
60454 assert_eq_m128d(r, e);
60455 }
60456
60457 #[simd_test(enable = "avx512f")]
60458 const fn test_mm_mask3_fnmadd_sd() {
60459 let a = _mm_set1_pd(1.);
60460 let b = _mm_set1_pd(2.);
60461 let c = _mm_set1_pd(3.);
60462 let r = _mm_mask3_fnmadd_sd(a, b, c, 0);
60463 assert_eq_m128d(r, c);
60464 let r = _mm_mask3_fnmadd_sd(a, b, c, 0b11111111);
60465 let e = _mm_set_pd(3., 1.);
60466 assert_eq_m128d(r, e);
60467 }
60468
60469 #[simd_test(enable = "avx512f")]
60470 const fn test_mm_mask_fnmsub_ss() {
60471 let a = _mm_set1_ps(1.);
60472 let b = _mm_set1_ps(2.);
60473 let c = _mm_set1_ps(3.);
60474 let r = _mm_mask_fnmsub_ss(a, 0, b, c);
60475 assert_eq_m128(r, a);
60476 let r = _mm_mask_fnmsub_ss(a, 0b11111111, b, c);
60477 let e = _mm_set_ps(1., 1., 1., -5.);
60478 assert_eq_m128(r, e);
60479 }
60480
60481 #[simd_test(enable = "avx512f")]
60482 const fn test_mm_maskz_fnmsub_ss() {
60483 let a = _mm_set1_ps(1.);
60484 let b = _mm_set1_ps(2.);
60485 let c = _mm_set1_ps(3.);
60486 let r = _mm_maskz_fnmsub_ss(0, a, b, c);
60487 let e = _mm_set_ps(1., 1., 1., 0.);
60488 assert_eq_m128(r, e);
60489 let r = _mm_maskz_fnmsub_ss(0b11111111, a, b, c);
60490 let e = _mm_set_ps(1., 1., 1., -5.);
60491 assert_eq_m128(r, e);
60492 }
60493
60494 #[simd_test(enable = "avx512f")]
60495 const fn test_mm_mask3_fnmsub_ss() {
60496 let a = _mm_set1_ps(1.);
60497 let b = _mm_set1_ps(2.);
60498 let c = _mm_set1_ps(3.);
60499 let r = _mm_mask3_fnmsub_ss(a, b, c, 0);
60500 assert_eq_m128(r, c);
60501 let r = _mm_mask3_fnmsub_ss(a, b, c, 0b11111111);
60502 let e = _mm_set_ps(3., 3., 3., -5.);
60503 assert_eq_m128(r, e);
60504 }
60505
60506 #[simd_test(enable = "avx512f")]
60507 const fn test_mm_mask_fnmsub_sd() {
60508 let a = _mm_set1_pd(1.);
60509 let b = _mm_set1_pd(2.);
60510 let c = _mm_set1_pd(3.);
60511 let r = _mm_mask_fnmsub_sd(a, 0, b, c);
60512 assert_eq_m128d(r, a);
60513 let r = _mm_mask_fnmsub_sd(a, 0b11111111, b, c);
60514 let e = _mm_set_pd(1., -5.);
60515 assert_eq_m128d(r, e);
60516 }
60517
60518 #[simd_test(enable = "avx512f")]
60519 const fn test_mm_maskz_fnmsub_sd() {
60520 let a = _mm_set1_pd(1.);
60521 let b = _mm_set1_pd(2.);
60522 let c = _mm_set1_pd(3.);
60523 let r = _mm_maskz_fnmsub_sd(0, a, b, c);
60524 let e = _mm_set_pd(1., 0.);
60525 assert_eq_m128d(r, e);
60526 let r = _mm_maskz_fnmsub_sd(0b11111111, a, b, c);
60527 let e = _mm_set_pd(1., -5.);
60528 assert_eq_m128d(r, e);
60529 }
60530
60531 #[simd_test(enable = "avx512f")]
60532 const fn test_mm_mask3_fnmsub_sd() {
60533 let a = _mm_set1_pd(1.);
60534 let b = _mm_set1_pd(2.);
60535 let c = _mm_set1_pd(3.);
60536 let r = _mm_mask3_fnmsub_sd(a, b, c, 0);
60537 assert_eq_m128d(r, c);
60538 let r = _mm_mask3_fnmsub_sd(a, b, c, 0b11111111);
60539 let e = _mm_set_pd(3., -5.);
60540 assert_eq_m128d(r, e);
60541 }
60542
60543 #[simd_test(enable = "avx512f")]
60544 fn test_mm_add_round_ss() {
60545 let a = _mm_set_ps(1., 2., 10., 20.);
60546 let b = _mm_set_ps(3., 4., 30., 40.);
60547 let r = _mm_add_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
60548 let e = _mm_set_ps(1., 2., 10., 60.);
60549 assert_eq_m128(r, e);
60550 }
60551
60552 #[simd_test(enable = "avx512f")]
60553 fn test_mm_mask_add_round_ss() {
60554 let src = _mm_set_ps(10., 11., 100., 110.);
60555 let a = _mm_set_ps(1., 2., 10., 20.);
60556 let b = _mm_set_ps(3., 4., 30., 40.);
60557 let r = _mm_mask_add_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(src, 0, a, b);
60558 let e = _mm_set_ps(1., 2., 10., 110.);
60559 assert_eq_m128(r, e);
60560 let r = _mm_mask_add_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(
60561 src, 0b11111111, a, b,
60562 );
60563 let e = _mm_set_ps(1., 2., 10., 60.);
60564 assert_eq_m128(r, e);
60565 }
60566
60567 #[simd_test(enable = "avx512f")]
60568 fn test_mm_maskz_add_round_ss() {
60569 let a = _mm_set_ps(1., 2., 10., 20.);
60570 let b = _mm_set_ps(3., 4., 30., 40.);
60571 let r = _mm_maskz_add_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0, a, b);
60572 let e = _mm_set_ps(1., 2., 10., 0.);
60573 assert_eq_m128(r, e);
60574 let r =
60575 _mm_maskz_add_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0b11111111, a, b);
60576 let e = _mm_set_ps(1., 2., 10., 60.);
60577 assert_eq_m128(r, e);
60578 }
60579
60580 #[simd_test(enable = "avx512f")]
60581 fn test_mm_add_round_sd() {
60582 let a = _mm_set_pd(1., 2.);
60583 let b = _mm_set_pd(3., 4.);
60584 let r = _mm_add_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
60585 let e = _mm_set_pd(1., 6.);
60586 assert_eq_m128d(r, e);
60587 }
60588
60589 #[simd_test(enable = "avx512f")]
60590 fn test_mm_mask_add_round_sd() {
60591 let src = _mm_set_pd(10., 11.);
60592 let a = _mm_set_pd(1., 2.);
60593 let b = _mm_set_pd(3., 4.);
60594 let r = _mm_mask_add_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(src, 0, a, b);
60595 let e = _mm_set_pd(1., 11.);
60596 assert_eq_m128d(r, e);
60597 let r = _mm_mask_add_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(
60598 src, 0b11111111, a, b,
60599 );
60600 let e = _mm_set_pd(1., 6.);
60601 assert_eq_m128d(r, e);
60602 }
60603
60604 #[simd_test(enable = "avx512f")]
60605 fn test_mm_maskz_add_round_sd() {
60606 let a = _mm_set_pd(1., 2.);
60607 let b = _mm_set_pd(3., 4.);
60608 let r = _mm_maskz_add_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0, a, b);
60609 let e = _mm_set_pd(1., 0.);
60610 assert_eq_m128d(r, e);
60611 let r =
60612 _mm_maskz_add_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0b11111111, a, b);
60613 let e = _mm_set_pd(1., 6.);
60614 assert_eq_m128d(r, e);
60615 }
60616
60617 #[simd_test(enable = "avx512f")]
60618 fn test_mm_sub_round_ss() {
60619 let a = _mm_set_ps(1., 2., 10., 20.);
60620 let b = _mm_set_ps(3., 4., 30., 40.);
60621 let r = _mm_sub_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
60622 let e = _mm_set_ps(1., 2., 10., -20.);
60623 assert_eq_m128(r, e);
60624 }
60625
60626 #[simd_test(enable = "avx512f")]
60627 fn test_mm_mask_sub_round_ss() {
60628 let src = _mm_set_ps(10., 11., 100., 110.);
60629 let a = _mm_set_ps(1., 2., 10., 20.);
60630 let b = _mm_set_ps(3., 4., 30., 40.);
60631 let r = _mm_mask_sub_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(src, 0, a, b);
60632 let e = _mm_set_ps(1., 2., 10., 110.);
60633 assert_eq_m128(r, e);
60634 let r = _mm_mask_sub_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(
60635 src, 0b11111111, a, b,
60636 );
60637 let e = _mm_set_ps(1., 2., 10., -20.);
60638 assert_eq_m128(r, e);
60639 }
60640
60641 #[simd_test(enable = "avx512f")]
60642 fn test_mm_maskz_sub_round_ss() {
60643 let a = _mm_set_ps(1., 2., 10., 20.);
60644 let b = _mm_set_ps(3., 4., 30., 40.);
60645 let r = _mm_maskz_sub_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0, a, b);
60646 let e = _mm_set_ps(1., 2., 10., 0.);
60647 assert_eq_m128(r, e);
60648 let r =
60649 _mm_maskz_sub_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0b11111111, a, b);
60650 let e = _mm_set_ps(1., 2., 10., -20.);
60651 assert_eq_m128(r, e);
60652 }
60653
60654 #[simd_test(enable = "avx512f")]
60655 fn test_mm_sub_round_sd() {
60656 let a = _mm_set_pd(1., 2.);
60657 let b = _mm_set_pd(3., 4.);
60658 let r = _mm_sub_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
60659 let e = _mm_set_pd(1., -2.);
60660 assert_eq_m128d(r, e);
60661 }
60662
60663 #[simd_test(enable = "avx512f")]
60664 fn test_mm_mask_sub_round_sd() {
60665 let src = _mm_set_pd(10., 11.);
60666 let a = _mm_set_pd(1., 2.);
60667 let b = _mm_set_pd(3., 4.);
60668 let r = _mm_mask_sub_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(src, 0, a, b);
60669 let e = _mm_set_pd(1., 11.);
60670 assert_eq_m128d(r, e);
60671 let r = _mm_mask_sub_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(
60672 src, 0b11111111, a, b,
60673 );
60674 let e = _mm_set_pd(1., -2.);
60675 assert_eq_m128d(r, e);
60676 }
60677
60678 #[simd_test(enable = "avx512f")]
60679 fn test_mm_maskz_sub_round_sd() {
60680 let a = _mm_set_pd(1., 2.);
60681 let b = _mm_set_pd(3., 4.);
60682 let r = _mm_maskz_sub_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0, a, b);
60683 let e = _mm_set_pd(1., 0.);
60684 assert_eq_m128d(r, e);
60685 let r =
60686 _mm_maskz_sub_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0b11111111, a, b);
60687 let e = _mm_set_pd(1., -2.);
60688 assert_eq_m128d(r, e);
60689 }
60690
60691 #[simd_test(enable = "avx512f")]
60692 fn test_mm_mul_round_ss() {
60693 let a = _mm_set_ps(1., 2., 10., 20.);
60694 let b = _mm_set_ps(3., 4., 30., 40.);
60695 let r = _mm_mul_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
60696 let e = _mm_set_ps(1., 2., 10., 800.);
60697 assert_eq_m128(r, e);
60698 }
60699
60700 #[simd_test(enable = "avx512f")]
60701 fn test_mm_mask_mul_round_ss() {
60702 let src = _mm_set_ps(10., 11., 100., 110.);
60703 let a = _mm_set_ps(1., 2., 10., 20.);
60704 let b = _mm_set_ps(3., 4., 30., 40.);
60705 let r = _mm_mask_mul_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(src, 0, a, b);
60706 let e = _mm_set_ps(1., 2., 10., 110.);
60707 assert_eq_m128(r, e);
60708 let r = _mm_mask_mul_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(
60709 src, 0b11111111, a, b,
60710 );
60711 let e = _mm_set_ps(1., 2., 10., 800.);
60712 assert_eq_m128(r, e);
60713 }
60714
60715 #[simd_test(enable = "avx512f")]
60716 fn test_mm_maskz_mul_round_ss() {
60717 let a = _mm_set_ps(1., 2., 10., 20.);
60718 let b = _mm_set_ps(3., 4., 30., 40.);
60719 let r = _mm_maskz_mul_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0, a, b);
60720 let e = _mm_set_ps(1., 2., 10., 0.);
60721 assert_eq_m128(r, e);
60722 let r =
60723 _mm_maskz_mul_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0b11111111, a, b);
60724 let e = _mm_set_ps(1., 2., 10., 800.);
60725 assert_eq_m128(r, e);
60726 }
60727
60728 #[simd_test(enable = "avx512f")]
60729 fn test_mm_mul_round_sd() {
60730 let a = _mm_set_pd(1., 2.);
60731 let b = _mm_set_pd(3., 4.);
60732 let r = _mm_mul_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
60733 let e = _mm_set_pd(1., 8.);
60734 assert_eq_m128d(r, e);
60735 }
60736
60737 #[simd_test(enable = "avx512f")]
60738 fn test_mm_mask_mul_round_sd() {
60739 let src = _mm_set_pd(10., 11.);
60740 let a = _mm_set_pd(1., 2.);
60741 let b = _mm_set_pd(3., 4.);
60742 let r = _mm_mask_mul_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(src, 0, a, b);
60743 let e = _mm_set_pd(1., 11.);
60744 assert_eq_m128d(r, e);
60745 let r = _mm_mask_mul_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(
60746 src, 0b11111111, a, b,
60747 );
60748 let e = _mm_set_pd(1., 8.);
60749 assert_eq_m128d(r, e);
60750 }
60751
60752 #[simd_test(enable = "avx512f")]
60753 fn test_mm_maskz_mul_round_sd() {
60754 let a = _mm_set_pd(1., 2.);
60755 let b = _mm_set_pd(3., 4.);
60756 let r = _mm_maskz_mul_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0, a, b);
60757 let e = _mm_set_pd(1., 0.);
60758 assert_eq_m128d(r, e);
60759 let r =
60760 _mm_maskz_mul_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0b11111111, a, b);
60761 let e = _mm_set_pd(1., 8.);
60762 assert_eq_m128d(r, e);
60763 }
60764
60765 #[simd_test(enable = "avx512f")]
60766 fn test_mm_div_round_ss() {
60767 let a = _mm_set_ps(1., 2., 10., 20.);
60768 let b = _mm_set_ps(3., 4., 30., 40.);
60769 let r = _mm_div_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
60770 let e = _mm_set_ps(1., 2., 10., 0.5);
60771 assert_eq_m128(r, e);
60772 }
60773
60774 #[simd_test(enable = "avx512f")]
60775 fn test_mm_mask_div_round_ss() {
60776 let src = _mm_set_ps(10., 11., 100., 110.);
60777 let a = _mm_set_ps(1., 2., 10., 20.);
60778 let b = _mm_set_ps(3., 4., 30., 40.);
60779 let r = _mm_mask_div_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(src, 0, a, b);
60780 let e = _mm_set_ps(1., 2., 10., 110.);
60781 assert_eq_m128(r, e);
60782 let r = _mm_mask_div_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(
60783 src, 0b11111111, a, b,
60784 );
60785 let e = _mm_set_ps(1., 2., 10., 0.5);
60786 assert_eq_m128(r, e);
60787 }
60788
60789 #[simd_test(enable = "avx512f")]
60790 fn test_mm_maskz_div_round_ss() {
60791 let a = _mm_set_ps(1., 2., 10., 20.);
60792 let b = _mm_set_ps(3., 4., 30., 40.);
60793 let r = _mm_maskz_div_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0, a, b);
60794 let e = _mm_set_ps(1., 2., 10., 0.);
60795 assert_eq_m128(r, e);
60796 let r =
60797 _mm_maskz_div_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0b11111111, a, b);
60798 let e = _mm_set_ps(1., 2., 10., 0.5);
60799 assert_eq_m128(r, e);
60800 }
60801
60802 #[simd_test(enable = "avx512f")]
60803 fn test_mm_div_round_sd() {
60804 let a = _mm_set_pd(1., 2.);
60805 let b = _mm_set_pd(3., 4.);
60806 let r = _mm_div_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
60807 let e = _mm_set_pd(1., 0.5);
60808 assert_eq_m128d(r, e);
60809 }
60810
60811 #[simd_test(enable = "avx512f")]
60812 fn test_mm_mask_div_round_sd() {
60813 let src = _mm_set_pd(10., 11.);
60814 let a = _mm_set_pd(1., 2.);
60815 let b = _mm_set_pd(3., 4.);
60816 let r = _mm_mask_div_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(src, 0, a, b);
60817 let e = _mm_set_pd(1., 11.);
60818 assert_eq_m128d(r, e);
60819 let r = _mm_mask_div_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(
60820 src, 0b11111111, a, b,
60821 );
60822 let e = _mm_set_pd(1., 0.5);
60823 assert_eq_m128d(r, e);
60824 }
60825
60826 #[simd_test(enable = "avx512f")]
60827 fn test_mm_maskz_div_round_sd() {
60828 let a = _mm_set_pd(1., 2.);
60829 let b = _mm_set_pd(3., 4.);
60830 let r = _mm_maskz_div_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0, a, b);
60831 let e = _mm_set_pd(1., 0.);
60832 assert_eq_m128d(r, e);
60833 let r =
60834 _mm_maskz_div_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0b11111111, a, b);
60835 let e = _mm_set_pd(1., 0.5);
60836 assert_eq_m128d(r, e);
60837 }
60838
60839 #[simd_test(enable = "avx512f")]
60840 fn test_mm_max_round_ss() {
60841 let a = _mm_set_ps(0., 1., 2., 3.);
60842 let b = _mm_set_ps(4., 5., 6., 7.);
60843 let r = _mm_max_round_ss::<_MM_FROUND_CUR_DIRECTION>(a, b);
60844 let e = _mm_set_ps(0., 1., 2., 7.);
60845 assert_eq_m128(r, e);
60846 }
60847
60848 #[simd_test(enable = "avx512f")]
60849 fn test_mm_mask_max_round_ss() {
60850 let a = _mm_set_ps(0., 1., 2., 3.);
60851 let b = _mm_set_ps(4., 5., 6., 7.);
60852 let r = _mm_mask_max_round_ss::<_MM_FROUND_CUR_DIRECTION>(a, 0, a, b);
60853 let e = _mm_set_ps(0., 1., 2., 3.);
60854 assert_eq_m128(r, e);
60855 let r = _mm_mask_max_round_ss::<_MM_FROUND_CUR_DIRECTION>(a, 0b11111111, a, b);
60856 let e = _mm_set_ps(0., 1., 2., 7.);
60857 assert_eq_m128(r, e);
60858 }
60859
60860 #[simd_test(enable = "avx512f")]
60861 fn test_mm_maskz_max_round_ss() {
60862 let a = _mm_set_ps(0., 1., 2., 3.);
60863 let b = _mm_set_ps(4., 5., 6., 7.);
60864 let r = _mm_maskz_max_round_ss::<_MM_FROUND_CUR_DIRECTION>(0, a, b);
60865 let e = _mm_set_ps(0., 1., 2., 0.);
60866 assert_eq_m128(r, e);
60867 let r = _mm_maskz_max_round_ss::<_MM_FROUND_CUR_DIRECTION>(0b11111111, a, b);
60868 let e = _mm_set_ps(0., 1., 2., 7.);
60869 assert_eq_m128(r, e);
60870 }
60871
60872 #[simd_test(enable = "avx512f")]
60873 fn test_mm_max_round_sd() {
60874 let a = _mm_set_pd(0., 1.);
60875 let b = _mm_set_pd(2., 3.);
60876 let r = _mm_max_round_sd::<_MM_FROUND_CUR_DIRECTION>(a, b);
60877 let e = _mm_set_pd(0., 3.);
60878 assert_eq_m128d(r, e);
60879 }
60880
60881 #[simd_test(enable = "avx512f")]
60882 fn test_mm_mask_max_round_sd() {
60883 let a = _mm_set_pd(0., 1.);
60884 let b = _mm_set_pd(2., 3.);
60885 let r = _mm_mask_max_round_sd::<_MM_FROUND_CUR_DIRECTION>(a, 0, a, b);
60886 let e = _mm_set_pd(0., 1.);
60887 assert_eq_m128d(r, e);
60888 let r = _mm_mask_max_round_sd::<_MM_FROUND_CUR_DIRECTION>(a, 0b11111111, a, b);
60889 let e = _mm_set_pd(0., 3.);
60890 assert_eq_m128d(r, e);
60891 }
60892
60893 #[simd_test(enable = "avx512f")]
60894 fn test_mm_maskz_max_round_sd() {
60895 let a = _mm_set_pd(0., 1.);
60896 let b = _mm_set_pd(2., 3.);
60897 let r = _mm_maskz_max_round_sd::<_MM_FROUND_CUR_DIRECTION>(0, a, b);
60898 let e = _mm_set_pd(0., 0.);
60899 assert_eq_m128d(r, e);
60900 let r = _mm_maskz_max_round_sd::<_MM_FROUND_CUR_DIRECTION>(0b11111111, a, b);
60901 let e = _mm_set_pd(0., 3.);
60902 assert_eq_m128d(r, e);
60903 }
60904
60905 #[simd_test(enable = "avx512f")]
60906 fn test_mm_min_round_ss() {
60907 let a = _mm_set_ps(0., 1., 2., 3.);
60908 let b = _mm_set_ps(4., 5., 6., 7.);
60909 let r = _mm_min_round_ss::<_MM_FROUND_CUR_DIRECTION>(a, b);
60910 let e = _mm_set_ps(0., 1., 2., 3.);
60911 assert_eq_m128(r, e);
60912 }
60913
60914 #[simd_test(enable = "avx512f")]
60915 fn test_mm_mask_min_round_ss() {
60916 let a = _mm_set_ps(0., 1., 2., 3.);
60917 let b = _mm_set_ps(4., 5., 6., 7.);
60918 let r = _mm_mask_min_round_ss::<_MM_FROUND_CUR_DIRECTION>(a, 0, a, b);
60919 let e = _mm_set_ps(0., 1., 2., 3.);
60920 assert_eq_m128(r, e);
60921 let r = _mm_mask_min_round_ss::<_MM_FROUND_CUR_DIRECTION>(a, 0b11111111, a, b);
60922 let e = _mm_set_ps(0., 1., 2., 3.);
60923 assert_eq_m128(r, e);
60924 }
60925
60926 #[simd_test(enable = "avx512f")]
60927 fn test_mm_maskz_min_round_ss() {
60928 let a = _mm_set_ps(0., 1., 2., 3.);
60929 let b = _mm_set_ps(4., 5., 6., 7.);
60930 let r = _mm_maskz_min_round_ss::<_MM_FROUND_CUR_DIRECTION>(0, a, b);
60931 let e = _mm_set_ps(0., 1., 2., 0.);
60932 assert_eq_m128(r, e);
60933 let r = _mm_maskz_min_round_ss::<_MM_FROUND_CUR_DIRECTION>(0b11111111, a, b);
60934 let e = _mm_set_ps(0., 1., 2., 3.);
60935 assert_eq_m128(r, e);
60936 }
60937
60938 #[simd_test(enable = "avx512f")]
60939 fn test_mm_min_round_sd() {
60940 let a = _mm_set_pd(0., 1.);
60941 let b = _mm_set_pd(2., 3.);
60942 let r = _mm_min_round_sd::<_MM_FROUND_CUR_DIRECTION>(a, b);
60943 let e = _mm_set_pd(0., 1.);
60944 assert_eq_m128d(r, e);
60945 }
60946
60947 #[simd_test(enable = "avx512f")]
60948 fn test_mm_mask_min_round_sd() {
60949 let a = _mm_set_pd(0., 1.);
60950 let b = _mm_set_pd(2., 3.);
60951 let r = _mm_mask_min_round_sd::<_MM_FROUND_CUR_DIRECTION>(a, 0, a, b);
60952 let e = _mm_set_pd(0., 1.);
60953 assert_eq_m128d(r, e);
60954 let r = _mm_mask_min_round_sd::<_MM_FROUND_CUR_DIRECTION>(a, 0b11111111, a, b);
60955 let e = _mm_set_pd(0., 1.);
60956 assert_eq_m128d(r, e);
60957 }
60958
60959 #[simd_test(enable = "avx512f")]
60960 fn test_mm_maskz_min_round_sd() {
60961 let a = _mm_set_pd(0., 1.);
60962 let b = _mm_set_pd(2., 3.);
60963 let r = _mm_maskz_min_round_sd::<_MM_FROUND_CUR_DIRECTION>(0, a, b);
60964 let e = _mm_set_pd(0., 0.);
60965 assert_eq_m128d(r, e);
60966 let r = _mm_maskz_min_round_sd::<_MM_FROUND_CUR_DIRECTION>(0b11111111, a, b);
60967 let e = _mm_set_pd(0., 1.);
60968 assert_eq_m128d(r, e);
60969 }
60970
60971 #[simd_test(enable = "avx512f")]
60972 fn test_mm_sqrt_round_ss() {
60973 let a = _mm_set_ps(1., 2., 10., 20.);
60974 let b = _mm_set_ps(3., 4., 30., 4.);
60975 let r = _mm_sqrt_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
60976 let e = _mm_set_ps(1., 2., 10., 2.);
60977 assert_eq_m128(r, e);
60978 }
60979
60980 #[simd_test(enable = "avx512f")]
60981 fn test_mm_mask_sqrt_round_ss() {
60982 let src = _mm_set_ps(10., 11., 100., 110.);
60983 let a = _mm_set_ps(1., 2., 10., 20.);
60984 let b = _mm_set_ps(3., 4., 30., 4.);
60985 let r = _mm_mask_sqrt_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(src, 0, a, b);
60986 let e = _mm_set_ps(1., 2., 10., 110.);
60987 assert_eq_m128(r, e);
60988 let r = _mm_mask_sqrt_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(
60989 src, 0b11111111, a, b,
60990 );
60991 let e = _mm_set_ps(1., 2., 10., 2.);
60992 assert_eq_m128(r, e);
60993 }
60994
60995 #[simd_test(enable = "avx512f")]
60996 fn test_mm_maskz_sqrt_round_ss() {
60997 let a = _mm_set_ps(1., 2., 10., 20.);
60998 let b = _mm_set_ps(3., 4., 30., 4.);
60999 let r = _mm_maskz_sqrt_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0, a, b);
61000 let e = _mm_set_ps(1., 2., 10., 0.);
61001 assert_eq_m128(r, e);
61002 let r =
61003 _mm_maskz_sqrt_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0b11111111, a, b);
61004 let e = _mm_set_ps(1., 2., 10., 2.);
61005 assert_eq_m128(r, e);
61006 }
61007
61008 #[simd_test(enable = "avx512f")]
61009 fn test_mm_sqrt_round_sd() {
61010 let a = _mm_set_pd(1., 2.);
61011 let b = _mm_set_pd(3., 4.);
61012 let r = _mm_sqrt_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
61013 let e = _mm_set_pd(1., 2.);
61014 assert_eq_m128d(r, e);
61015 }
61016
61017 #[simd_test(enable = "avx512f")]
61018 fn test_mm_mask_sqrt_round_sd() {
61019 let src = _mm_set_pd(10., 11.);
61020 let a = _mm_set_pd(1., 2.);
61021 let b = _mm_set_pd(3., 4.);
61022 let r = _mm_mask_sqrt_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(src, 0, a, b);
61023 let e = _mm_set_pd(1., 11.);
61024 assert_eq_m128d(r, e);
61025 let r = _mm_mask_sqrt_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(
61026 src, 0b11111111, a, b,
61027 );
61028 let e = _mm_set_pd(1., 2.);
61029 assert_eq_m128d(r, e);
61030 }
61031
61032 #[simd_test(enable = "avx512f")]
61033 fn test_mm_maskz_sqrt_round_sd() {
61034 let a = _mm_set_pd(1., 2.);
61035 let b = _mm_set_pd(3., 4.);
61036 let r = _mm_maskz_sqrt_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0, a, b);
61037 let e = _mm_set_pd(1., 0.);
61038 assert_eq_m128d(r, e);
61039 let r =
61040 _mm_maskz_sqrt_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0b11111111, a, b);
61041 let e = _mm_set_pd(1., 2.);
61042 assert_eq_m128d(r, e);
61043 }
61044
61045 #[simd_test(enable = "avx512f")]
61046 fn test_mm_getexp_round_ss() {
61047 let a = _mm_set1_ps(2.);
61048 let b = _mm_set1_ps(3.);
61049 let r = _mm_getexp_round_ss::<_MM_FROUND_CUR_DIRECTION>(a, b);
61050 let e = _mm_set_ps(2., 2., 2., 1.);
61051 assert_eq_m128(r, e);
61052 }
61053
61054 #[simd_test(enable = "avx512f")]
61055 fn test_mm_mask_getexp_round_ss() {
61056 let a = _mm_set1_ps(2.);
61057 let b = _mm_set1_ps(3.);
61058 let r = _mm_mask_getexp_round_ss::<_MM_FROUND_CUR_DIRECTION>(a, 0, a, b);
61059 let e = _mm_set_ps(2., 2., 2., 2.);
61060 assert_eq_m128(r, e);
61061 let r = _mm_mask_getexp_round_ss::<_MM_FROUND_CUR_DIRECTION>(a, 0b11111111, a, b);
61062 let e = _mm_set_ps(2., 2., 2., 1.);
61063 assert_eq_m128(r, e);
61064 }
61065
61066 #[simd_test(enable = "avx512f")]
61067 fn test_mm_maskz_getexp_round_ss() {
61068 let a = _mm_set1_ps(2.);
61069 let b = _mm_set1_ps(3.);
61070 let r = _mm_maskz_getexp_round_ss::<_MM_FROUND_CUR_DIRECTION>(0, a, b);
61071 let e = _mm_set_ps(2., 2., 2., 0.);
61072 assert_eq_m128(r, e);
61073 let r = _mm_maskz_getexp_round_ss::<_MM_FROUND_CUR_DIRECTION>(0b11111111, a, b);
61074 let e = _mm_set_ps(2., 2., 2., 1.);
61075 assert_eq_m128(r, e);
61076 }
61077
61078 #[simd_test(enable = "avx512f")]
61079 fn test_mm_getexp_round_sd() {
61080 let a = _mm_set1_pd(2.);
61081 let b = _mm_set1_pd(3.);
61082 let r = _mm_getexp_round_sd::<_MM_FROUND_CUR_DIRECTION>(a, b);
61083 let e = _mm_set_pd(2., 1.);
61084 assert_eq_m128d(r, e);
61085 }
61086
61087 #[simd_test(enable = "avx512f")]
61088 fn test_mm_mask_getexp_round_sd() {
61089 let a = _mm_set1_pd(2.);
61090 let b = _mm_set1_pd(3.);
61091 let r = _mm_mask_getexp_round_sd::<_MM_FROUND_CUR_DIRECTION>(a, 0, a, b);
61092 let e = _mm_set_pd(2., 2.);
61093 assert_eq_m128d(r, e);
61094 let r = _mm_mask_getexp_round_sd::<_MM_FROUND_CUR_DIRECTION>(a, 0b11111111, a, b);
61095 let e = _mm_set_pd(2., 1.);
61096 assert_eq_m128d(r, e);
61097 }
61098
61099 #[simd_test(enable = "avx512f")]
61100 fn test_mm_maskz_getexp_round_sd() {
61101 let a = _mm_set1_pd(2.);
61102 let b = _mm_set1_pd(3.);
61103 let r = _mm_maskz_getexp_round_sd::<_MM_FROUND_CUR_DIRECTION>(0, a, b);
61104 let e = _mm_set_pd(2., 0.);
61105 assert_eq_m128d(r, e);
61106 let r = _mm_maskz_getexp_round_sd::<_MM_FROUND_CUR_DIRECTION>(0b11111111, a, b);
61107 let e = _mm_set_pd(2., 1.);
61108 assert_eq_m128d(r, e);
61109 }
61110
61111 #[simd_test(enable = "avx512f")]
61112 fn test_mm_getmant_round_ss() {
61113 let a = _mm_set1_ps(20.);
61114 let b = _mm_set1_ps(10.);
61115 let r =
61116 _mm_getmant_round_ss::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC, _MM_FROUND_CUR_DIRECTION>(
61117 a, b,
61118 );
61119 let e = _mm_set_ps(20., 20., 20., 1.25);
61120 assert_eq_m128(r, e);
61121 }
61122
61123 #[simd_test(enable = "avx512f")]
61124 fn test_mm_mask_getmant_round_ss() {
61125 let a = _mm_set1_ps(20.);
61126 let b = _mm_set1_ps(10.);
61127 let r = _mm_mask_getmant_round_ss::<
61128 _MM_MANT_NORM_1_2,
61129 _MM_MANT_SIGN_SRC,
61130 _MM_FROUND_CUR_DIRECTION,
61131 >(a, 0, a, b);
61132 let e = _mm_set_ps(20., 20., 20., 20.);
61133 assert_eq_m128(r, e);
61134 let r = _mm_mask_getmant_round_ss::<
61135 _MM_MANT_NORM_1_2,
61136 _MM_MANT_SIGN_SRC,
61137 _MM_FROUND_CUR_DIRECTION,
61138 >(a, 0b11111111, a, b);
61139 let e = _mm_set_ps(20., 20., 20., 1.25);
61140 assert_eq_m128(r, e);
61141 }
61142
61143 #[simd_test(enable = "avx512f")]
61144 fn test_mm_maskz_getmant_round_ss() {
61145 let a = _mm_set1_ps(20.);
61146 let b = _mm_set1_ps(10.);
61147 let r = _mm_maskz_getmant_round_ss::<
61148 _MM_MANT_NORM_1_2,
61149 _MM_MANT_SIGN_SRC,
61150 _MM_FROUND_CUR_DIRECTION,
61151 >(0, a, b);
61152 let e = _mm_set_ps(20., 20., 20., 0.);
61153 assert_eq_m128(r, e);
61154 let r = _mm_maskz_getmant_round_ss::<
61155 _MM_MANT_NORM_1_2,
61156 _MM_MANT_SIGN_SRC,
61157 _MM_FROUND_CUR_DIRECTION,
61158 >(0b11111111, a, b);
61159 let e = _mm_set_ps(20., 20., 20., 1.25);
61160 assert_eq_m128(r, e);
61161 }
61162
61163 #[simd_test(enable = "avx512f")]
61164 fn test_mm_getmant_round_sd() {
61165 let a = _mm_set1_pd(20.);
61166 let b = _mm_set1_pd(10.);
61167 let r =
61168 _mm_getmant_round_sd::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC, _MM_FROUND_CUR_DIRECTION>(
61169 a, b,
61170 );
61171 let e = _mm_set_pd(20., 1.25);
61172 assert_eq_m128d(r, e);
61173 }
61174
61175 #[simd_test(enable = "avx512f")]
61176 fn test_mm_mask_getmant_round_sd() {
61177 let a = _mm_set1_pd(20.);
61178 let b = _mm_set1_pd(10.);
61179 let r = _mm_mask_getmant_round_sd::<
61180 _MM_MANT_NORM_1_2,
61181 _MM_MANT_SIGN_SRC,
61182 _MM_FROUND_CUR_DIRECTION,
61183 >(a, 0, a, b);
61184 let e = _mm_set_pd(20., 20.);
61185 assert_eq_m128d(r, e);
61186 let r = _mm_mask_getmant_round_sd::<
61187 _MM_MANT_NORM_1_2,
61188 _MM_MANT_SIGN_SRC,
61189 _MM_FROUND_CUR_DIRECTION,
61190 >(a, 0b11111111, a, b);
61191 let e = _mm_set_pd(20., 1.25);
61192 assert_eq_m128d(r, e);
61193 }
61194
61195 #[simd_test(enable = "avx512f")]
61196 fn test_mm_maskz_getmant_round_sd() {
61197 let a = _mm_set1_pd(20.);
61198 let b = _mm_set1_pd(10.);
61199 let r = _mm_maskz_getmant_round_sd::<
61200 _MM_MANT_NORM_1_2,
61201 _MM_MANT_SIGN_SRC,
61202 _MM_FROUND_CUR_DIRECTION,
61203 >(0, a, b);
61204 let e = _mm_set_pd(20., 0.);
61205 assert_eq_m128d(r, e);
61206 let r = _mm_maskz_getmant_round_sd::<
61207 _MM_MANT_NORM_1_2,
61208 _MM_MANT_SIGN_SRC,
61209 _MM_FROUND_CUR_DIRECTION,
61210 >(0b11111111, a, b);
61211 let e = _mm_set_pd(20., 1.25);
61212 assert_eq_m128d(r, e);
61213 }
61214
61215 #[simd_test(enable = "avx512f")]
61216 fn test_mm_roundscale_round_ss() {
61217 let a = _mm_set1_ps(2.2);
61218 let b = _mm_set1_ps(1.1);
61219 let r = _mm_roundscale_round_ss::<0, _MM_FROUND_CUR_DIRECTION>(a, b);
61220 let e = _mm_set_ps(2.2, 2.2, 2.2, 1.0);
61221 assert_eq_m128(r, e);
61222 }
61223
61224 #[simd_test(enable = "avx512f")]
61225 fn test_mm_mask_roundscale_round_ss() {
61226 let a = _mm_set1_ps(2.2);
61227 let b = _mm_set1_ps(1.1);
61228 let r = _mm_mask_roundscale_round_ss::<0, _MM_FROUND_CUR_DIRECTION>(a, 0, a, b);
61229 let e = _mm_set_ps(2.2, 2.2, 2.2, 2.2);
61230 assert_eq_m128(r, e);
61231 let r = _mm_mask_roundscale_round_ss::<0, _MM_FROUND_CUR_DIRECTION>(a, 0b11111111, a, b);
61232 let e = _mm_set_ps(2.2, 2.2, 2.2, 1.0);
61233 assert_eq_m128(r, e);
61234 }
61235
61236 #[simd_test(enable = "avx512f")]
61237 fn test_mm_maskz_roundscale_round_ss() {
61238 let a = _mm_set1_ps(2.2);
61239 let b = _mm_set1_ps(1.1);
61240 let r = _mm_maskz_roundscale_round_ss::<0, _MM_FROUND_CUR_DIRECTION>(0, a, b);
61241 let e = _mm_set_ps(2.2, 2.2, 2.2, 0.0);
61242 assert_eq_m128(r, e);
61243 let r = _mm_maskz_roundscale_round_ss::<0, _MM_FROUND_CUR_DIRECTION>(0b11111111, a, b);
61244 let e = _mm_set_ps(2.2, 2.2, 2.2, 1.0);
61245 assert_eq_m128(r, e);
61246 }
61247
61248 #[simd_test(enable = "avx512f")]
61249 fn test_mm_roundscale_round_sd() {
61250 let a = _mm_set1_pd(2.2);
61251 let b = _mm_set1_pd(1.1);
61252 let r = _mm_roundscale_round_sd::<0, _MM_FROUND_CUR_DIRECTION>(a, b);
61253 let e = _mm_set_pd(2.2, 1.0);
61254 assert_eq_m128d(r, e);
61255 }
61256
61257 #[simd_test(enable = "avx512f")]
61258 fn test_mm_mask_roundscale_round_sd() {
61259 let a = _mm_set1_pd(2.2);
61260 let b = _mm_set1_pd(1.1);
61261 let r = _mm_mask_roundscale_round_sd::<0, _MM_FROUND_CUR_DIRECTION>(a, 0, a, b);
61262 let e = _mm_set_pd(2.2, 2.2);
61263 assert_eq_m128d(r, e);
61264 let r = _mm_mask_roundscale_round_sd::<0, _MM_FROUND_CUR_DIRECTION>(a, 0b11111111, a, b);
61265 let e = _mm_set_pd(2.2, 1.0);
61266 assert_eq_m128d(r, e);
61267 }
61268
61269 #[simd_test(enable = "avx512f")]
61270 fn test_mm_maskz_roundscale_round_sd() {
61271 let a = _mm_set1_pd(2.2);
61272 let b = _mm_set1_pd(1.1);
61273 let r = _mm_maskz_roundscale_round_sd::<0, _MM_FROUND_CUR_DIRECTION>(0, a, b);
61274 let e = _mm_set_pd(2.2, 0.0);
61275 assert_eq_m128d(r, e);
61276 let r = _mm_maskz_roundscale_round_sd::<0, _MM_FROUND_CUR_DIRECTION>(0b11111111, a, b);
61277 let e = _mm_set_pd(2.2, 1.0);
61278 assert_eq_m128d(r, e);
61279 }
61280
61281 #[simd_test(enable = "avx512f")]
61282 fn test_mm_scalef_round_ss() {
61283 let a = _mm_set1_ps(1.);
61284 let b = _mm_set1_ps(3.);
61285 let r = _mm_scalef_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
61286 let e = _mm_set_ps(1., 1., 1., 8.);
61287 assert_eq_m128(r, e);
61288 }
61289
61290 #[simd_test(enable = "avx512f")]
61291 fn test_mm_mask_scalef_round_ss() {
61292 let a = _mm_set1_ps(1.);
61293 let b = _mm_set1_ps(3.);
61294 let r = _mm_mask_scalef_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
61295 a, 0, a, b,
61296 );
61297 let e = _mm_set_ps(1., 1., 1., 1.);
61298 assert_eq_m128(r, e);
61299 let r = _mm_mask_scalef_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
61300 a, 0b11111111, a, b,
61301 );
61302 let e = _mm_set_ps(1., 1., 1., 8.);
61303 assert_eq_m128(r, e);
61304 }
61305
61306 #[simd_test(enable = "avx512f")]
61307 fn test_mm_maskz_scalef_round_ss() {
61308 let a = _mm_set1_ps(1.);
61309 let b = _mm_set1_ps(3.);
61310 let r =
61311 _mm_maskz_scalef_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
61312 let e = _mm_set_ps(1., 1., 1., 0.);
61313 assert_eq_m128(r, e);
61314 let r = _mm_maskz_scalef_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
61315 0b11111111, a, b,
61316 );
61317 let e = _mm_set_ps(1., 1., 1., 8.);
61318 assert_eq_m128(r, e);
61319 }
61320
61321 #[simd_test(enable = "avx512f")]
61322 fn test_mm_scalef_round_sd() {
61323 let a = _mm_set1_pd(1.);
61324 let b = _mm_set1_pd(3.);
61325 let r = _mm_scalef_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
61326 let e = _mm_set_pd(1., 8.);
61327 assert_eq_m128d(r, e);
61328 }
61329
61330 #[simd_test(enable = "avx512f")]
61331 fn test_mm_mask_scalef_round_sd() {
61332 let a = _mm_set1_pd(1.);
61333 let b = _mm_set1_pd(3.);
61334 let r = _mm_mask_scalef_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
61335 a, 0, a, b,
61336 );
61337 let e = _mm_set_pd(1., 1.);
61338 assert_eq_m128d(r, e);
61339 let r = _mm_mask_scalef_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
61340 a, 0b11111111, a, b,
61341 );
61342 let e = _mm_set_pd(1., 8.);
61343 assert_eq_m128d(r, e);
61344 }
61345
61346 #[simd_test(enable = "avx512f")]
61347 fn test_mm_maskz_scalef_round_sd() {
61348 let a = _mm_set1_pd(1.);
61349 let b = _mm_set1_pd(3.);
61350 let r =
61351 _mm_maskz_scalef_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
61352 let e = _mm_set_pd(1., 0.);
61353 assert_eq_m128d(r, e);
61354 let r = _mm_maskz_scalef_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
61355 0b11111111, a, b,
61356 );
61357 let e = _mm_set_pd(1., 8.);
61358 assert_eq_m128d(r, e);
61359 }
61360
61361 #[simd_test(enable = "avx512f")]
61362 fn test_mm_fmadd_round_ss() {
61363 let a = _mm_set1_ps(1.);
61364 let b = _mm_set1_ps(2.);
61365 let c = _mm_set1_ps(3.);
61366 let r = _mm_fmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
61367 let e = _mm_set_ps(1., 1., 1., 5.);
61368 assert_eq_m128(r, e);
61369 }
61370
61371 #[simd_test(enable = "avx512f")]
61372 fn test_mm_mask_fmadd_round_ss() {
61373 let a = _mm_set1_ps(1.);
61374 let b = _mm_set1_ps(2.);
61375 let c = _mm_set1_ps(3.);
61376 let r = _mm_mask_fmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
61377 a, 0, b, c,
61378 );
61379 assert_eq_m128(r, a);
61380 let r = _mm_mask_fmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
61381 a, 0b11111111, b, c,
61382 );
61383 let e = _mm_set_ps(1., 1., 1., 5.);
61384 assert_eq_m128(r, e);
61385 }
61386
61387 #[simd_test(enable = "avx512f")]
61388 fn test_mm_maskz_fmadd_round_ss() {
61389 let a = _mm_set1_ps(1.);
61390 let b = _mm_set1_ps(2.);
61391 let c = _mm_set1_ps(3.);
61392 let r = _mm_maskz_fmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
61393 0, a, b, c,
61394 );
61395 let e = _mm_set_ps(1., 1., 1., 0.);
61396 assert_eq_m128(r, e);
61397 let r = _mm_maskz_fmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
61398 0b11111111, a, b, c,
61399 );
61400 let e = _mm_set_ps(1., 1., 1., 5.);
61401 assert_eq_m128(r, e);
61402 }
61403
61404 #[simd_test(enable = "avx512f")]
61405 fn test_mm_mask3_fmadd_round_ss() {
61406 let a = _mm_set1_ps(1.);
61407 let b = _mm_set1_ps(2.);
61408 let c = _mm_set1_ps(3.);
61409 let r = _mm_mask3_fmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
61410 a, b, c, 0,
61411 );
61412 assert_eq_m128(r, c);
61413 let r = _mm_mask3_fmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
61414 a, b, c, 0b11111111,
61415 );
61416 let e = _mm_set_ps(3., 3., 3., 5.);
61417 assert_eq_m128(r, e);
61418 }
61419
61420 #[simd_test(enable = "avx512f")]
61421 fn test_mm_fmadd_round_sd() {
61422 let a = _mm_set1_pd(1.);
61423 let b = _mm_set1_pd(2.);
61424 let c = _mm_set1_pd(3.);
61425 let r = _mm_fmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
61426 let e = _mm_set_pd(1., 5.);
61427 assert_eq_m128d(r, e);
61428 }
61429
61430 #[simd_test(enable = "avx512f")]
61431 fn test_mm_mask_fmadd_round_sd() {
61432 let a = _mm_set1_pd(1.);
61433 let b = _mm_set1_pd(2.);
61434 let c = _mm_set1_pd(3.);
61435 let r = _mm_mask_fmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
61436 a, 0, b, c,
61437 );
61438 assert_eq_m128d(r, a);
61439 let r = _mm_mask_fmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
61440 a, 0b11111111, b, c,
61441 );
61442 let e = _mm_set_pd(1., 5.);
61443 assert_eq_m128d(r, e);
61444 }
61445
61446 #[simd_test(enable = "avx512f")]
61447 fn test_mm_maskz_fmadd_round_sd() {
61448 let a = _mm_set1_pd(1.);
61449 let b = _mm_set1_pd(2.);
61450 let c = _mm_set1_pd(3.);
61451 let r = _mm_maskz_fmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
61452 0, a, b, c,
61453 );
61454 let e = _mm_set_pd(1., 0.);
61455 assert_eq_m128d(r, e);
61456 let r = _mm_maskz_fmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
61457 0b11111111, a, b, c,
61458 );
61459 let e = _mm_set_pd(1., 5.);
61460 assert_eq_m128d(r, e);
61461 }
61462
61463 #[simd_test(enable = "avx512f")]
61464 fn test_mm_mask3_fmadd_round_sd() {
61465 let a = _mm_set1_pd(1.);
61466 let b = _mm_set1_pd(2.);
61467 let c = _mm_set1_pd(3.);
61468 let r = _mm_mask3_fmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
61469 a, b, c, 0,
61470 );
61471 assert_eq_m128d(r, c);
61472 let r = _mm_mask3_fmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
61473 a, b, c, 0b11111111,
61474 );
61475 let e = _mm_set_pd(3., 5.);
61476 assert_eq_m128d(r, e);
61477 }
61478
61479 #[simd_test(enable = "avx512f")]
61480 fn test_mm_fmsub_round_ss() {
61481 let a = _mm_set1_ps(1.);
61482 let b = _mm_set1_ps(2.);
61483 let c = _mm_set1_ps(3.);
61484 let r = _mm_fmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
61485 let e = _mm_set_ps(1., 1., 1., -1.);
61486 assert_eq_m128(r, e);
61487 }
61488
61489 #[simd_test(enable = "avx512f")]
61490 fn test_mm_mask_fmsub_round_ss() {
61491 let a = _mm_set1_ps(1.);
61492 let b = _mm_set1_ps(2.);
61493 let c = _mm_set1_ps(3.);
61494 let r = _mm_mask_fmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
61495 a, 0, b, c,
61496 );
61497 assert_eq_m128(r, a);
61498 let r = _mm_mask_fmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
61499 a, 0b11111111, b, c,
61500 );
61501 let e = _mm_set_ps(1., 1., 1., -1.);
61502 assert_eq_m128(r, e);
61503 }
61504
61505 #[simd_test(enable = "avx512f")]
61506 fn test_mm_maskz_fmsub_round_ss() {
61507 let a = _mm_set1_ps(1.);
61508 let b = _mm_set1_ps(2.);
61509 let c = _mm_set1_ps(3.);
61510 let r = _mm_maskz_fmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
61511 0, a, b, c,
61512 );
61513 let e = _mm_set_ps(1., 1., 1., 0.);
61514 assert_eq_m128(r, e);
61515 let r = _mm_maskz_fmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
61516 0b11111111, a, b, c,
61517 );
61518 let e = _mm_set_ps(1., 1., 1., -1.);
61519 assert_eq_m128(r, e);
61520 }
61521
61522 #[simd_test(enable = "avx512f")]
61523 fn test_mm_mask3_fmsub_round_ss() {
61524 let a = _mm_set1_ps(1.);
61525 let b = _mm_set1_ps(2.);
61526 let c = _mm_set1_ps(3.);
61527 let r = _mm_mask3_fmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
61528 a, b, c, 0,
61529 );
61530 assert_eq_m128(r, c);
61531 let r = _mm_mask3_fmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
61532 a, b, c, 0b11111111,
61533 );
61534 let e = _mm_set_ps(3., 3., 3., -1.);
61535 assert_eq_m128(r, e);
61536 }
61537
61538 #[simd_test(enable = "avx512f")]
61539 fn test_mm_fmsub_round_sd() {
61540 let a = _mm_set1_pd(1.);
61541 let b = _mm_set1_pd(2.);
61542 let c = _mm_set1_pd(3.);
61543 let r = _mm_fmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
61544 let e = _mm_set_pd(1., -1.);
61545 assert_eq_m128d(r, e);
61546 }
61547
61548 #[simd_test(enable = "avx512f")]
61549 fn test_mm_mask_fmsub_round_sd() {
61550 let a = _mm_set1_pd(1.);
61551 let b = _mm_set1_pd(2.);
61552 let c = _mm_set1_pd(3.);
61553 let r = _mm_mask_fmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
61554 a, 0, b, c,
61555 );
61556 assert_eq_m128d(r, a);
61557 let r = _mm_mask_fmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
61558 a, 0b11111111, b, c,
61559 );
61560 let e = _mm_set_pd(1., -1.);
61561 assert_eq_m128d(r, e);
61562 }
61563
61564 #[simd_test(enable = "avx512f")]
61565 fn test_mm_maskz_fmsub_round_sd() {
61566 let a = _mm_set1_pd(1.);
61567 let b = _mm_set1_pd(2.);
61568 let c = _mm_set1_pd(3.);
61569 let r = _mm_maskz_fmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
61570 0, a, b, c,
61571 );
61572 let e = _mm_set_pd(1., 0.);
61573 assert_eq_m128d(r, e);
61574 let r = _mm_maskz_fmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
61575 0b11111111, a, b, c,
61576 );
61577 let e = _mm_set_pd(1., -1.);
61578 assert_eq_m128d(r, e);
61579 }
61580
61581 #[simd_test(enable = "avx512f")]
61582 fn test_mm_mask3_fmsub_round_sd() {
61583 let a = _mm_set1_pd(1.);
61584 let b = _mm_set1_pd(2.);
61585 let c = _mm_set1_pd(3.);
61586 let r = _mm_mask3_fmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
61587 a, b, c, 0,
61588 );
61589 assert_eq_m128d(r, c);
61590 let r = _mm_mask3_fmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
61591 a, b, c, 0b11111111,
61592 );
61593 let e = _mm_set_pd(3., -1.);
61594 assert_eq_m128d(r, e);
61595 }
61596
61597 #[simd_test(enable = "avx512f")]
61598 fn test_mm_fnmadd_round_ss() {
61599 let a = _mm_set1_ps(1.);
61600 let b = _mm_set1_ps(2.);
61601 let c = _mm_set1_ps(3.);
61602 let r = _mm_fnmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
61603 let e = _mm_set_ps(1., 1., 1., 1.);
61604 assert_eq_m128(r, e);
61605 }
61606
61607 #[simd_test(enable = "avx512f")]
61608 fn test_mm_mask_fnmadd_round_ss() {
61609 let a = _mm_set1_ps(1.);
61610 let b = _mm_set1_ps(2.);
61611 let c = _mm_set1_ps(3.);
61612 let r = _mm_mask_fnmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
61613 a, 0, b, c,
61614 );
61615 assert_eq_m128(r, a);
61616 let r = _mm_mask_fnmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
61617 a, 0b11111111, b, c,
61618 );
61619 let e = _mm_set_ps(1., 1., 1., 1.);
61620 assert_eq_m128(r, e);
61621 }
61622
61623 #[simd_test(enable = "avx512f")]
61624 fn test_mm_maskz_fnmadd_round_ss() {
61625 let a = _mm_set1_ps(1.);
61626 let b = _mm_set1_ps(2.);
61627 let c = _mm_set1_ps(3.);
61628 let r = _mm_maskz_fnmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
61629 0, a, b, c,
61630 );
61631 let e = _mm_set_ps(1., 1., 1., 0.);
61632 assert_eq_m128(r, e);
61633 let r = _mm_maskz_fnmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
61634 0b11111111, a, b, c,
61635 );
61636 let e = _mm_set_ps(1., 1., 1., 1.);
61637 assert_eq_m128(r, e);
61638 }
61639
61640 #[simd_test(enable = "avx512f")]
61641 fn test_mm_mask3_fnmadd_round_ss() {
61642 let a = _mm_set1_ps(1.);
61643 let b = _mm_set1_ps(2.);
61644 let c = _mm_set1_ps(3.);
61645 let r = _mm_mask3_fnmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
61646 a, b, c, 0,
61647 );
61648 assert_eq_m128(r, c);
61649 let r = _mm_mask3_fnmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
61650 a, b, c, 0b11111111,
61651 );
61652 let e = _mm_set_ps(3., 3., 3., 1.);
61653 assert_eq_m128(r, e);
61654 }
61655
61656 #[simd_test(enable = "avx512f")]
61657 fn test_mm_fnmadd_round_sd() {
61658 let a = _mm_set1_pd(1.);
61659 let b = _mm_set1_pd(2.);
61660 let c = _mm_set1_pd(3.);
61661 let r = _mm_fnmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
61662 let e = _mm_set_pd(1., 1.);
61663 assert_eq_m128d(r, e);
61664 }
61665
61666 #[simd_test(enable = "avx512f")]
61667 fn test_mm_mask_fnmadd_round_sd() {
61668 let a = _mm_set1_pd(1.);
61669 let b = _mm_set1_pd(2.);
61670 let c = _mm_set1_pd(3.);
61671 let r = _mm_mask_fnmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
61672 a, 0, b, c,
61673 );
61674 assert_eq_m128d(r, a);
61675 let r = _mm_mask_fnmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
61676 a, 0b11111111, b, c,
61677 );
61678 let e = _mm_set_pd(1., 1.);
61679 assert_eq_m128d(r, e);
61680 }
61681
61682 #[simd_test(enable = "avx512f")]
61683 fn test_mm_maskz_fnmadd_round_sd() {
61684 let a = _mm_set1_pd(1.);
61685 let b = _mm_set1_pd(2.);
61686 let c = _mm_set1_pd(3.);
61687 let r = _mm_maskz_fnmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
61688 0, a, b, c,
61689 );
61690 let e = _mm_set_pd(1., 0.);
61691 assert_eq_m128d(r, e);
61692 let r = _mm_maskz_fnmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
61693 0b11111111, a, b, c,
61694 );
61695 let e = _mm_set_pd(1., 1.);
61696 assert_eq_m128d(r, e);
61697 }
61698
61699 #[simd_test(enable = "avx512f")]
61700 fn test_mm_mask3_fnmadd_round_sd() {
61701 let a = _mm_set1_pd(1.);
61702 let b = _mm_set1_pd(2.);
61703 let c = _mm_set1_pd(3.);
61704 let r = _mm_mask3_fnmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
61705 a, b, c, 0,
61706 );
61707 assert_eq_m128d(r, c);
61708 let r = _mm_mask3_fnmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
61709 a, b, c, 0b11111111,
61710 );
61711 let e = _mm_set_pd(3., 1.);
61712 assert_eq_m128d(r, e);
61713 }
61714
61715 #[simd_test(enable = "avx512f")]
61716 fn test_mm_fnmsub_round_ss() {
61717 let a = _mm_set1_ps(1.);
61718 let b = _mm_set1_ps(2.);
61719 let c = _mm_set1_ps(3.);
61720 let r = _mm_fnmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
61721 let e = _mm_set_ps(1., 1., 1., -5.);
61722 assert_eq_m128(r, e);
61723 }
61724
61725 #[simd_test(enable = "avx512f")]
61726 fn test_mm_mask_fnmsub_round_ss() {
61727 let a = _mm_set1_ps(1.);
61728 let b = _mm_set1_ps(2.);
61729 let c = _mm_set1_ps(3.);
61730 let r = _mm_mask_fnmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
61731 a, 0, b, c,
61732 );
61733 assert_eq_m128(r, a);
61734 let r = _mm_mask_fnmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
61735 a, 0b11111111, b, c,
61736 );
61737 let e = _mm_set_ps(1., 1., 1., -5.);
61738 assert_eq_m128(r, e);
61739 }
61740
61741 #[simd_test(enable = "avx512f")]
61742 fn test_mm_maskz_fnmsub_round_ss() {
61743 let a = _mm_set1_ps(1.);
61744 let b = _mm_set1_ps(2.);
61745 let c = _mm_set1_ps(3.);
61746 let r = _mm_maskz_fnmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
61747 0, a, b, c,
61748 );
61749 let e = _mm_set_ps(1., 1., 1., 0.);
61750 assert_eq_m128(r, e);
61751 let r = _mm_maskz_fnmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
61752 0b11111111, a, b, c,
61753 );
61754 let e = _mm_set_ps(1., 1., 1., -5.);
61755 assert_eq_m128(r, e);
61756 }
61757
61758 #[simd_test(enable = "avx512f")]
61759 fn test_mm_mask3_fnmsub_round_ss() {
61760 let a = _mm_set1_ps(1.);
61761 let b = _mm_set1_ps(2.);
61762 let c = _mm_set1_ps(3.);
61763 let r = _mm_mask3_fnmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
61764 a, b, c, 0,
61765 );
61766 assert_eq_m128(r, c);
61767 let r = _mm_mask3_fnmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
61768 a, b, c, 0b11111111,
61769 );
61770 let e = _mm_set_ps(3., 3., 3., -5.);
61771 assert_eq_m128(r, e);
61772 }
61773
61774 #[simd_test(enable = "avx512f")]
61775 fn test_mm_fnmsub_round_sd() {
61776 let a = _mm_set1_pd(1.);
61777 let b = _mm_set1_pd(2.);
61778 let c = _mm_set1_pd(3.);
61779 let r = _mm_fnmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
61780 let e = _mm_set_pd(1., -5.);
61781 assert_eq_m128d(r, e);
61782 }
61783
61784 #[simd_test(enable = "avx512f")]
61785 fn test_mm_mask_fnmsub_round_sd() {
61786 let a = _mm_set1_pd(1.);
61787 let b = _mm_set1_pd(2.);
61788 let c = _mm_set1_pd(3.);
61789 let r = _mm_mask_fnmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
61790 a, 0, b, c,
61791 );
61792 assert_eq_m128d(r, a);
61793 let r = _mm_mask_fnmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
61794 a, 0b11111111, b, c,
61795 );
61796 let e = _mm_set_pd(1., -5.);
61797 assert_eq_m128d(r, e);
61798 }
61799
61800 #[simd_test(enable = "avx512f")]
61801 fn test_mm_maskz_fnmsub_round_sd() {
61802 let a = _mm_set1_pd(1.);
61803 let b = _mm_set1_pd(2.);
61804 let c = _mm_set1_pd(3.);
61805 let r = _mm_maskz_fnmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
61806 0, a, b, c,
61807 );
61808 let e = _mm_set_pd(1., 0.);
61809 assert_eq_m128d(r, e);
61810 let r = _mm_maskz_fnmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
61811 0b11111111, a, b, c,
61812 );
61813 let e = _mm_set_pd(1., -5.);
61814 assert_eq_m128d(r, e);
61815 }
61816
61817 #[simd_test(enable = "avx512f")]
61818 fn test_mm_mask3_fnmsub_round_sd() {
61819 let a = _mm_set1_pd(1.);
61820 let b = _mm_set1_pd(2.);
61821 let c = _mm_set1_pd(3.);
61822 let r = _mm_mask3_fnmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
61823 a, b, c, 0,
61824 );
61825 assert_eq_m128d(r, c);
61826 let r = _mm_mask3_fnmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
61827 a, b, c, 0b11111111,
61828 );
61829 let e = _mm_set_pd(3., -5.);
61830 assert_eq_m128d(r, e);
61831 }
61832
61833 #[simd_test(enable = "avx512f")]
61834 fn test_mm_fixupimm_ss() {
61835 let a = _mm_set_ps(0., 0., 0., f32::NAN);
61836 let b = _mm_set1_ps(f32::MAX);
61837 let c = _mm_set1_epi32(i32::MAX);
61838 let r = _mm_fixupimm_ss::<5>(a, b, c);
61839 let e = _mm_set_ps(0., 0., 0., -0.0);
61840 assert_eq_m128(r, e);
61841 }
61842
61843 #[simd_test(enable = "avx512f")]
61844 fn test_mm_mask_fixupimm_ss() {
61845 let a = _mm_set_ps(0., 0., 0., f32::NAN);
61846 let b = _mm_set1_ps(f32::MAX);
61847 let c = _mm_set1_epi32(i32::MAX);
61848 let r = _mm_mask_fixupimm_ss::<5>(a, 0b11111111, b, c);
61849 let e = _mm_set_ps(0., 0., 0., -0.0);
61850 assert_eq_m128(r, e);
61851 }
61852
61853 #[simd_test(enable = "avx512f")]
61854 fn test_mm_maskz_fixupimm_ss() {
61855 let a = _mm_set_ps(0., 0., 0., f32::NAN);
61856 let b = _mm_set1_ps(f32::MAX);
61857 let c = _mm_set1_epi32(i32::MAX);
61858 let r = _mm_maskz_fixupimm_ss::<5>(0b00000000, a, b, c);
61859 let e = _mm_set_ps(0., 0., 0., 0.0);
61860 assert_eq_m128(r, e);
61861 let r = _mm_maskz_fixupimm_ss::<5>(0b11111111, a, b, c);
61862 let e = _mm_set_ps(0., 0., 0., -0.0);
61863 assert_eq_m128(r, e);
61864 }
61865
61866 #[simd_test(enable = "avx512f")]
61867 fn test_mm_fixupimm_sd() {
61868 let a = _mm_set_pd(0., f64::NAN);
61869 let b = _mm_set1_pd(f64::MAX);
61870 let c = _mm_set1_epi64x(i32::MAX as i64);
61871 let r = _mm_fixupimm_sd::<5>(a, b, c);
61872 let e = _mm_set_pd(0., -0.0);
61873 assert_eq_m128d(r, e);
61874 }
61875
61876 #[simd_test(enable = "avx512f")]
61877 fn test_mm_mask_fixupimm_sd() {
61878 let a = _mm_set_pd(0., f64::NAN);
61879 let b = _mm_set1_pd(f64::MAX);
61880 let c = _mm_set1_epi64x(i32::MAX as i64);
61881 let r = _mm_mask_fixupimm_sd::<5>(a, 0b11111111, b, c);
61882 let e = _mm_set_pd(0., -0.0);
61883 assert_eq_m128d(r, e);
61884 }
61885
61886 #[simd_test(enable = "avx512f")]
61887 fn test_mm_maskz_fixupimm_sd() {
61888 let a = _mm_set_pd(0., f64::NAN);
61889 let b = _mm_set1_pd(f64::MAX);
61890 let c = _mm_set1_epi64x(i32::MAX as i64);
61891 let r = _mm_maskz_fixupimm_sd::<5>(0b00000000, a, b, c);
61892 let e = _mm_set_pd(0., 0.0);
61893 assert_eq_m128d(r, e);
61894 let r = _mm_maskz_fixupimm_sd::<5>(0b11111111, a, b, c);
61895 let e = _mm_set_pd(0., -0.0);
61896 assert_eq_m128d(r, e);
61897 }
61898
61899 #[simd_test(enable = "avx512f")]
61900 fn test_mm_fixupimm_round_ss() {
61901 let a = _mm_set_ps(1., 0., 0., f32::NAN);
61902 let b = _mm_set1_ps(f32::MAX);
61903 let c = _mm_set1_epi32(i32::MAX);
61904 let r = _mm_fixupimm_round_ss::<5, _MM_FROUND_CUR_DIRECTION>(a, b, c);
61905 let e = _mm_set_ps(1., 0., 0., -0.0);
61906 assert_eq_m128(r, e);
61907 }
61908
61909 #[simd_test(enable = "avx512f")]
61910 fn test_mm_mask_fixupimm_round_ss() {
61911 let a = _mm_set_ps(0., 0., 0., f32::NAN);
61912 let b = _mm_set1_ps(f32::MAX);
61913 let c = _mm_set1_epi32(i32::MAX);
61914 let r = _mm_mask_fixupimm_round_ss::<5, _MM_FROUND_CUR_DIRECTION>(a, 0b11111111, b, c);
61915 let e = _mm_set_ps(0., 0., 0., -0.0);
61916 assert_eq_m128(r, e);
61917 }
61918
61919 #[simd_test(enable = "avx512f")]
61920 fn test_mm_maskz_fixupimm_round_ss() {
61921 let a = _mm_set_ps(0., 0., 0., f32::NAN);
61922 let b = _mm_set1_ps(f32::MAX);
61923 let c = _mm_set1_epi32(i32::MAX);
61924 let r = _mm_maskz_fixupimm_round_ss::<5, _MM_FROUND_CUR_DIRECTION>(0b00000000, a, b, c);
61925 let e = _mm_set_ps(0., 0., 0., 0.0);
61926 assert_eq_m128(r, e);
61927 let r = _mm_maskz_fixupimm_round_ss::<5, _MM_FROUND_CUR_DIRECTION>(0b11111111, a, b, c);
61928 let e = _mm_set_ps(0., 0., 0., -0.0);
61929 assert_eq_m128(r, e);
61930 }
61931
61932 #[simd_test(enable = "avx512f")]
61933 fn test_mm_fixupimm_round_sd() {
61934 let a = _mm_set_pd(0., f64::NAN);
61935 let b = _mm_set1_pd(f64::MAX);
61936 let c = _mm_set1_epi64x(i32::MAX as i64);
61937 let r = _mm_fixupimm_round_sd::<5, _MM_FROUND_CUR_DIRECTION>(a, b, c);
61938 let e = _mm_set_pd(0., -0.0);
61939 assert_eq_m128d(r, e);
61940 }
61941
61942 #[simd_test(enable = "avx512f")]
61943 fn test_mm_mask_fixupimm_round_sd() {
61944 let a = _mm_set_pd(0., f64::NAN);
61945 let b = _mm_set1_pd(f64::MAX);
61946 let c = _mm_set1_epi64x(i32::MAX as i64);
61947 let r = _mm_mask_fixupimm_round_sd::<5, _MM_FROUND_CUR_DIRECTION>(a, 0b11111111, b, c);
61948 let e = _mm_set_pd(0., -0.0);
61949 assert_eq_m128d(r, e);
61950 }
61951
61952 #[simd_test(enable = "avx512f")]
61953 fn test_mm_maskz_fixupimm_round_sd() {
61954 let a = _mm_set_pd(0., f64::NAN);
61955 let b = _mm_set1_pd(f64::MAX);
61956 let c = _mm_set1_epi64x(i32::MAX as i64);
61957 let r = _mm_maskz_fixupimm_round_sd::<5, _MM_FROUND_CUR_DIRECTION>(0b00000000, a, b, c);
61958 let e = _mm_set_pd(0., 0.0);
61959 assert_eq_m128d(r, e);
61960 let r = _mm_maskz_fixupimm_round_sd::<5, _MM_FROUND_CUR_DIRECTION>(0b11111111, a, b, c);
61961 let e = _mm_set_pd(0., -0.0);
61962 assert_eq_m128d(r, e);
61963 }
61964
61965 #[simd_test(enable = "avx512f")]
61966 fn test_mm_mask_cvtss_sd() {
61967 let a = _mm_set_pd(6., -7.5);
61968 let b = _mm_set_ps(0., -0.5, 1., -1.5);
61969 let r = _mm_mask_cvtss_sd(a, 0, a, b);
61970 assert_eq_m128d(r, a);
61971 let r = _mm_mask_cvtss_sd(a, 0b11111111, a, b);
61972 let e = _mm_set_pd(6., -1.5);
61973 assert_eq_m128d(r, e);
61974 }
61975
61976 #[simd_test(enable = "avx512f")]
61977 fn test_mm_maskz_cvtss_sd() {
61978 let a = _mm_set_pd(6., -7.5);
61979 let b = _mm_set_ps(0., -0.5, 1., -1.5);
61980 let r = _mm_maskz_cvtss_sd(0, a, b);
61981 let e = _mm_set_pd(6., 0.);
61982 assert_eq_m128d(r, e);
61983 let r = _mm_maskz_cvtss_sd(0b11111111, a, b);
61984 let e = _mm_set_pd(6., -1.5);
61985 assert_eq_m128d(r, e);
61986 }
61987
61988 #[simd_test(enable = "avx512f")]
61989 fn test_mm_mask_cvtsd_ss() {
61990 let a = _mm_set_ps(0., -0.5, 1., -1.5);
61991 let b = _mm_set_pd(6., -7.5);
61992 let r = _mm_mask_cvtsd_ss(a, 0, a, b);
61993 assert_eq_m128(r, a);
61994 let r = _mm_mask_cvtsd_ss(a, 0b11111111, a, b);
61995 let e = _mm_set_ps(0., -0.5, 1., -7.5);
61996 assert_eq_m128(r, e);
61997 }
61998
61999 #[simd_test(enable = "avx512f")]
62000 fn test_mm_maskz_cvtsd_ss() {
62001 let a = _mm_set_ps(0., -0.5, 1., -1.5);
62002 let b = _mm_set_pd(6., -7.5);
62003 let r = _mm_maskz_cvtsd_ss(0, a, b);
62004 let e = _mm_set_ps(0., -0.5, 1., 0.);
62005 assert_eq_m128(r, e);
62006 let r = _mm_maskz_cvtsd_ss(0b11111111, a, b);
62007 let e = _mm_set_ps(0., -0.5, 1., -7.5);
62008 assert_eq_m128(r, e);
62009 }
62010
62011 #[simd_test(enable = "avx512f")]
62012 fn test_mm_cvt_roundss_sd() {
62013 let a = _mm_set_pd(6., -7.5);
62014 let b = _mm_set_ps(0., -0.5, 1., -1.5);
62015 let r = _mm_cvt_roundss_sd::<_MM_FROUND_CUR_DIRECTION>(a, b);
62016 let e = _mm_set_pd(6., -1.5);
62017 assert_eq_m128d(r, e);
62018 }
62019
62020 #[simd_test(enable = "avx512f")]
62021 fn test_mm_mask_cvt_roundss_sd() {
62022 let a = _mm_set_pd(6., -7.5);
62023 let b = _mm_set_ps(0., -0.5, 1., -1.5);
62024 let r = _mm_mask_cvt_roundss_sd::<_MM_FROUND_CUR_DIRECTION>(a, 0, a, b);
62025 assert_eq_m128d(r, a);
62026 let r = _mm_mask_cvt_roundss_sd::<_MM_FROUND_CUR_DIRECTION>(a, 0b11111111, a, b);
62027 let e = _mm_set_pd(6., -1.5);
62028 assert_eq_m128d(r, e);
62029 }
62030
62031 #[simd_test(enable = "avx512f")]
62032 fn test_mm_maskz_cvt_roundss_sd() {
62033 let a = _mm_set_pd(6., -7.5);
62034 let b = _mm_set_ps(0., -0.5, 1., -1.5);
62035 let r = _mm_maskz_cvt_roundss_sd::<_MM_FROUND_CUR_DIRECTION>(0, a, b);
62036 let e = _mm_set_pd(6., 0.);
62037 assert_eq_m128d(r, e);
62038 let r = _mm_maskz_cvt_roundss_sd::<_MM_FROUND_CUR_DIRECTION>(0b11111111, a, b);
62039 let e = _mm_set_pd(6., -1.5);
62040 assert_eq_m128d(r, e);
62041 }
62042
62043 #[simd_test(enable = "avx512f")]
62044 fn test_mm_cvt_roundsd_ss() {
62045 let a = _mm_set_ps(0., -0.5, 1., -1.5);
62046 let b = _mm_set_pd(6., -7.5);
62047 let r = _mm_cvt_roundsd_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
62048 let e = _mm_set_ps(0., -0.5, 1., -7.5);
62049 assert_eq_m128(r, e);
62050 }
62051
62052 #[simd_test(enable = "avx512f")]
62053 fn test_mm_mask_cvt_roundsd_ss() {
62054 let a = _mm_set_ps(0., -0.5, 1., -1.5);
62055 let b = _mm_set_pd(6., -7.5);
62056 let r = _mm_mask_cvt_roundsd_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, 0, a, b);
62057 assert_eq_m128(r, a);
62058 let r = _mm_mask_cvt_roundsd_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(
62059 a, 0b11111111, a, b,
62060 );
62061 let e = _mm_set_ps(0., -0.5, 1., -7.5);
62062 assert_eq_m128(r, e);
62063 }
62064
62065 #[simd_test(enable = "avx512f")]
62066 fn test_mm_maskz_cvt_roundsd_ss() {
62067 let a = _mm_set_ps(0., -0.5, 1., -1.5);
62068 let b = _mm_set_pd(6., -7.5);
62069 let r = _mm_maskz_cvt_roundsd_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0, a, b);
62070 let e = _mm_set_ps(0., -0.5, 1., 0.);
62071 assert_eq_m128(r, e);
62072 let r = _mm_maskz_cvt_roundsd_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(
62073 0b11111111, a, b,
62074 );
62075 let e = _mm_set_ps(0., -0.5, 1., -7.5);
62076 assert_eq_m128(r, e);
62077 }
62078
62079 #[simd_test(enable = "avx512f")]
62080 fn test_mm_cvt_roundss_si32() {
62081 let a = _mm_set_ps(0., -0.5, 1., -1.5);
62082 let r = _mm_cvt_roundss_si32::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a);
62083 let e: i32 = -1;
62084 assert_eq!(r, e);
62085 }
62086
62087 #[simd_test(enable = "avx512f")]
62088 fn test_mm_cvt_roundss_i32() {
62089 let a = _mm_set_ps(0., -0.5, 1., -1.5);
62090 let r = _mm_cvt_roundss_i32::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a);
62091 let e: i32 = -1;
62092 assert_eq!(r, e);
62093 }
62094
62095 #[simd_test(enable = "avx512f")]
62096 fn test_mm_cvt_roundss_u32() {
62097 let a = _mm_set_ps(0., -0.5, 1., -1.5);
62098 let r = _mm_cvt_roundss_u32::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a);
62099 let e: u32 = u32::MAX;
62100 assert_eq!(r, e);
62101 }
62102
62103 #[simd_test(enable = "avx512f")]
62104 fn test_mm_cvtss_i32() {
62105 let a = _mm_set_ps(0., -0.5, 1., -1.5);
62106 let r = _mm_cvtss_i32(a);
62107 let e: i32 = -2;
62108 assert_eq!(r, e);
62109 }
62110
62111 #[simd_test(enable = "avx512f")]
62112 fn test_mm_cvtss_u32() {
62113 let a = _mm_set_ps(0., -0.5, 1., -1.5);
62114 let r = _mm_cvtss_u32(a);
62115 let e: u32 = u32::MAX;
62116 assert_eq!(r, e);
62117 }
62118
62119 #[simd_test(enable = "avx512f")]
62120 fn test_mm_cvt_roundsd_si32() {
62121 let a = _mm_set_pd(1., -1.5);
62122 let r = _mm_cvt_roundsd_si32::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a);
62123 let e: i32 = -1;
62124 assert_eq!(r, e);
62125 }
62126
62127 #[simd_test(enable = "avx512f")]
62128 fn test_mm_cvt_roundsd_i32() {
62129 let a = _mm_set_pd(1., -1.5);
62130 let r = _mm_cvt_roundsd_i32::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a);
62131 let e: i32 = -1;
62132 assert_eq!(r, e);
62133 }
62134
62135 #[simd_test(enable = "avx512f")]
62136 fn test_mm_cvt_roundsd_u32() {
62137 let a = _mm_set_pd(1., -1.5);
62138 let r = _mm_cvt_roundsd_u32::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a);
62139 let e: u32 = u32::MAX;
62140 assert_eq!(r, e);
62141 }
62142
62143 #[simd_test(enable = "avx512f")]
62144 fn test_mm_cvtsd_i32() {
62145 let a = _mm_set_pd(1., -1.5);
62146 let r = _mm_cvtsd_i32(a);
62147 let e: i32 = -2;
62148 assert_eq!(r, e);
62149 }
62150
62151 #[simd_test(enable = "avx512f")]
62152 fn test_mm_cvtsd_u32() {
62153 let a = _mm_set_pd(1., -1.5);
62154 let r = _mm_cvtsd_u32(a);
62155 let e: u32 = u32::MAX;
62156 assert_eq!(r, e);
62157 }
62158
62159 #[simd_test(enable = "avx512f")]
62160 fn test_mm_cvt_roundi32_ss() {
62161 let a = _mm_set_ps(0., -0.5, 1., -1.5);
62162 let b: i32 = 9;
62163 let r = _mm_cvt_roundi32_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
62164 let e = _mm_set_ps(0., -0.5, 1., 9.);
62165 assert_eq_m128(r, e);
62166 }
62167
62168 #[simd_test(enable = "avx512f")]
62169 fn test_mm_cvt_roundsi32_ss() {
62170 let a = _mm_set_ps(0., -0.5, 1., -1.5);
62171 let b: i32 = 9;
62172 let r = _mm_cvt_roundsi32_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
62173 let e = _mm_set_ps(0., -0.5, 1., 9.);
62174 assert_eq_m128(r, e);
62175 }
62176
62177 #[simd_test(enable = "avx512f")]
62178 fn test_mm_cvt_roundu32_ss() {
62179 let a = _mm_set_ps(0., -0.5, 1., -1.5);
62180 let b: u32 = 9;
62181 let r = _mm_cvt_roundu32_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
62182 let e = _mm_set_ps(0., -0.5, 1., 9.);
62183 assert_eq_m128(r, e);
62184 }
62185
62186 #[simd_test(enable = "avx512f")]
62187 const fn test_mm_cvti32_ss() {
62188 let a = _mm_set_ps(0., -0.5, 1., -1.5);
62189 let b: i32 = 9;
62190 let r = _mm_cvti32_ss(a, b);
62191 let e = _mm_set_ps(0., -0.5, 1., 9.);
62192 assert_eq_m128(r, e);
62193 }
62194
62195 #[simd_test(enable = "avx512f")]
62196 const fn test_mm_cvti32_sd() {
62197 let a = _mm_set_pd(1., -1.5);
62198 let b: i32 = 9;
62199 let r = _mm_cvti32_sd(a, b);
62200 let e = _mm_set_pd(1., 9.);
62201 assert_eq_m128d(r, e);
62202 }
62203
62204 #[simd_test(enable = "avx512f")]
62205 fn test_mm_cvtt_roundss_si32() {
62206 let a = _mm_set_ps(0., -0.5, 1., -1.5);
62207 let r = _mm_cvtt_roundss_si32::<_MM_FROUND_NO_EXC>(a);
62208 let e: i32 = -1;
62209 assert_eq!(r, e);
62210 }
62211
62212 #[simd_test(enable = "avx512f")]
62213 fn test_mm_cvtt_roundss_i32() {
62214 let a = _mm_set_ps(0., -0.5, 1., -1.5);
62215 let r = _mm_cvtt_roundss_i32::<_MM_FROUND_NO_EXC>(a);
62216 let e: i32 = -1;
62217 assert_eq!(r, e);
62218 }
62219
62220 #[simd_test(enable = "avx512f")]
62221 fn test_mm_cvtt_roundss_u32() {
62222 let a = _mm_set_ps(0., -0.5, 1., -1.5);
62223 let r = _mm_cvtt_roundss_u32::<_MM_FROUND_NO_EXC>(a);
62224 let e: u32 = u32::MAX;
62225 assert_eq!(r, e);
62226 }
62227
62228 #[simd_test(enable = "avx512f")]
62229 fn test_mm_cvttss_i32() {
62230 let a = _mm_set_ps(0., -0.5, 1., -1.5);
62231 let r = _mm_cvttss_i32(a);
62232 let e: i32 = -1;
62233 assert_eq!(r, e);
62234 }
62235
62236 #[simd_test(enable = "avx512f")]
62237 fn test_mm_cvttss_u32() {
62238 let a = _mm_set_ps(0., -0.5, 1., -1.5);
62239 let r = _mm_cvttss_u32(a);
62240 let e: u32 = u32::MAX;
62241 assert_eq!(r, e);
62242 }
62243
62244 #[simd_test(enable = "avx512f")]
62245 fn test_mm_cvtt_roundsd_si32() {
62246 let a = _mm_set_pd(1., -1.5);
62247 let r = _mm_cvtt_roundsd_si32::<_MM_FROUND_NO_EXC>(a);
62248 let e: i32 = -1;
62249 assert_eq!(r, e);
62250 }
62251
62252 #[simd_test(enable = "avx512f")]
62253 fn test_mm_cvtt_roundsd_i32() {
62254 let a = _mm_set_pd(1., -1.5);
62255 let r = _mm_cvtt_roundsd_i32::<_MM_FROUND_NO_EXC>(a);
62256 let e: i32 = -1;
62257 assert_eq!(r, e);
62258 }
62259
62260 #[simd_test(enable = "avx512f")]
62261 fn test_mm_cvtt_roundsd_u32() {
62262 let a = _mm_set_pd(1., -1.5);
62263 let r = _mm_cvtt_roundsd_u32::<_MM_FROUND_NO_EXC>(a);
62264 let e: u32 = u32::MAX;
62265 assert_eq!(r, e);
62266 }
62267
62268 #[simd_test(enable = "avx512f")]
62269 fn test_mm_cvttsd_i32() {
62270 let a = _mm_set_pd(1., -1.5);
62271 let r = _mm_cvttsd_i32(a);
62272 let e: i32 = -1;
62273 assert_eq!(r, e);
62274 }
62275
62276 #[simd_test(enable = "avx512f")]
62277 fn test_mm_cvttsd_u32() {
62278 let a = _mm_set_pd(1., -1.5);
62279 let r = _mm_cvttsd_u32(a);
62280 let e: u32 = u32::MAX;
62281 assert_eq!(r, e);
62282 }
62283
62284 #[simd_test(enable = "avx512f")]
62285 const fn test_mm_cvtu32_ss() {
62286 let a = _mm_set_ps(0., -0.5, 1., -1.5);
62287 let b: u32 = 9;
62288 let r = _mm_cvtu32_ss(a, b);
62289 let e = _mm_set_ps(0., -0.5, 1., 9.);
62290 assert_eq_m128(r, e);
62291 }
62292
62293 #[simd_test(enable = "avx512f")]
62294 const fn test_mm_cvtu32_sd() {
62295 let a = _mm_set_pd(1., -1.5);
62296 let b: u32 = 9;
62297 let r = _mm_cvtu32_sd(a, b);
62298 let e = _mm_set_pd(1., 9.);
62299 assert_eq_m128d(r, e);
62300 }
62301
62302 #[simd_test(enable = "avx512f")]
62303 fn test_mm_comi_round_ss() {
62304 let a = _mm_set1_ps(2.2);
62305 let b = _mm_set1_ps(1.1);
62306 let r = _mm_comi_round_ss::<0, _MM_FROUND_CUR_DIRECTION>(a, b);
62307 let e: i32 = 0;
62308 assert_eq!(r, e);
62309 }
62310
62311 #[simd_test(enable = "avx512f")]
62312 fn test_mm_comi_round_sd() {
62313 let a = _mm_set1_pd(2.2);
62314 let b = _mm_set1_pd(1.1);
62315 let r = _mm_comi_round_sd::<0, _MM_FROUND_CUR_DIRECTION>(a, b);
62316 let e: i32 = 0;
62317 assert_eq!(r, e);
62318 }
62319
62320 #[simd_test(enable = "avx512f")]
62321 const fn test_mm512_cvtsi512_si32() {
62322 let a = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
62323 let r = _mm512_cvtsi512_si32(a);
62324 let e: i32 = 1;
62325 assert_eq!(r, e);
62326 }
62327
62328 #[simd_test(enable = "avx512f")]
62329 const fn test_mm512_cvtss_f32() {
62330 let a = _mm512_setr_ps(
62331 312.0134, 3., 2., 5., 8., 9., 64., 50., -4., -3., -2., -5., -8., -9., -64., -50.,
62332 );
62333 assert_eq!(_mm512_cvtss_f32(a), 312.0134);
62334 }
62335
62336 #[simd_test(enable = "avx512f")]
62337 const fn test_mm512_cvtsd_f64() {
62338 let r = _mm512_cvtsd_f64(_mm512_setr_pd(-1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8));
62339 assert_eq!(r, -1.1);
62340 }
62341
62342 #[simd_test(enable = "avx512f")]
62343 const fn test_mm512_shuffle_pd() {
62344 let a = _mm512_setr_pd(1., 4., 5., 8., 1., 4., 5., 8.);
62345 let b = _mm512_setr_pd(2., 3., 6., 7., 2., 3., 6., 7.);
62346 let r = _mm512_shuffle_pd::<0b11_11_11_11>(a, b);
62347 let e = _mm512_setr_pd(4., 3., 8., 7., 4., 3., 8., 7.);
62348 assert_eq_m512d(r, e);
62349 }
62350
62351 #[simd_test(enable = "avx512f")]
62352 const fn test_mm512_mask_shuffle_pd() {
62353 let a = _mm512_setr_pd(1., 4., 5., 8., 1., 4., 5., 8.);
62354 let b = _mm512_setr_pd(2., 3., 6., 7., 2., 3., 6., 7.);
62355 let r = _mm512_mask_shuffle_pd::<0b11_11_11_11>(a, 0, a, b);
62356 assert_eq_m512d(r, a);
62357 let r = _mm512_mask_shuffle_pd::<0b11_11_11_11>(a, 0b11111111, a, b);
62358 let e = _mm512_setr_pd(4., 3., 8., 7., 4., 3., 8., 7.);
62359 assert_eq_m512d(r, e);
62360 }
62361
62362 #[simd_test(enable = "avx512f")]
62363 const fn test_mm512_maskz_shuffle_pd() {
62364 let a = _mm512_setr_pd(1., 4., 5., 8., 1., 4., 5., 8.);
62365 let b = _mm512_setr_pd(2., 3., 6., 7., 2., 3., 6., 7.);
62366 let r = _mm512_maskz_shuffle_pd::<0b11_11_11_11>(0, a, b);
62367 assert_eq_m512d(r, _mm512_setzero_pd());
62368 let r = _mm512_maskz_shuffle_pd::<0b11_11_11_11>(0b00001111, a, b);
62369 let e = _mm512_setr_pd(4., 3., 8., 7., 0., 0., 0., 0.);
62370 assert_eq_m512d(r, e);
62371 }
62372
62373 #[simd_test(enable = "avx512f")]
62374 fn test_mm512_mask_expandloadu_epi32() {
62375 let src = _mm512_set1_epi32(42);
62376 let a = &[1_i32, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
62377 let p = a.as_ptr();
62378 let m = 0b11101000_11001010;
62379 let r = unsafe { _mm512_mask_expandloadu_epi32(src, m, black_box(p)) };
62380 let e = _mm512_set_epi32(8, 7, 6, 42, 5, 42, 42, 42, 4, 3, 42, 42, 2, 42, 1, 42);
62381 assert_eq_m512i(r, e);
62382 }
62383
62384 #[simd_test(enable = "avx512f")]
62385 fn test_mm512_maskz_expandloadu_epi32() {
62386 let a = &[1_i32, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
62387 let p = a.as_ptr();
62388 let m = 0b11101000_11001010;
62389 let r = unsafe { _mm512_maskz_expandloadu_epi32(m, black_box(p)) };
62390 let e = _mm512_set_epi32(8, 7, 6, 0, 5, 0, 0, 0, 4, 3, 0, 0, 2, 0, 1, 0);
62391 assert_eq_m512i(r, e);
62392 }
62393
62394 #[simd_test(enable = "avx512f,avx512vl")]
62395 fn test_mm256_mask_expandloadu_epi32() {
62396 let src = _mm256_set1_epi32(42);
62397 let a = &[1_i32, 2, 3, 4, 5, 6, 7, 8];
62398 let p = a.as_ptr();
62399 let m = 0b11101000;
62400 let r = unsafe { _mm256_mask_expandloadu_epi32(src, m, black_box(p)) };
62401 let e = _mm256_set_epi32(4, 3, 2, 42, 1, 42, 42, 42);
62402 assert_eq_m256i(r, e);
62403 }
62404
62405 #[simd_test(enable = "avx512f,avx512vl")]
62406 fn test_mm256_maskz_expandloadu_epi32() {
62407 let a = &[1_i32, 2, 3, 4, 5, 6, 7, 8];
62408 let p = a.as_ptr();
62409 let m = 0b11101000;
62410 let r = unsafe { _mm256_maskz_expandloadu_epi32(m, black_box(p)) };
62411 let e = _mm256_set_epi32(4, 3, 2, 0, 1, 0, 0, 0);
62412 assert_eq_m256i(r, e);
62413 }
62414
62415 #[simd_test(enable = "avx512f,avx512vl")]
62416 fn test_mm_mask_expandloadu_epi32() {
62417 let src = _mm_set1_epi32(42);
62418 let a = &[1_i32, 2, 3, 4];
62419 let p = a.as_ptr();
62420 let m = 0b11111000;
62421 let r = unsafe { _mm_mask_expandloadu_epi32(src, m, black_box(p)) };
62422 let e = _mm_set_epi32(1, 42, 42, 42);
62423 assert_eq_m128i(r, e);
62424 }
62425
62426 #[simd_test(enable = "avx512f,avx512vl")]
62427 fn test_mm_maskz_expandloadu_epi32() {
62428 let a = &[1_i32, 2, 3, 4];
62429 let p = a.as_ptr();
62430 let m = 0b11111000;
62431 let r = unsafe { _mm_maskz_expandloadu_epi32(m, black_box(p)) };
62432 let e = _mm_set_epi32(1, 0, 0, 0);
62433 assert_eq_m128i(r, e);
62434 }
62435
62436 #[simd_test(enable = "avx512f")]
62437 fn test_mm512_mask_expandloadu_epi64() {
62438 let src = _mm512_set1_epi64(42);
62439 let a = &[1_i64, 2, 3, 4, 5, 6, 7, 8];
62440 let p = a.as_ptr();
62441 let m = 0b11101000;
62442 let r = unsafe { _mm512_mask_expandloadu_epi64(src, m, black_box(p)) };
62443 let e = _mm512_set_epi64(4, 3, 2, 42, 1, 42, 42, 42);
62444 assert_eq_m512i(r, e);
62445 }
62446
62447 #[simd_test(enable = "avx512f")]
62448 fn test_mm512_maskz_expandloadu_epi64() {
62449 let a = &[1_i64, 2, 3, 4, 5, 6, 7, 8];
62450 let p = a.as_ptr();
62451 let m = 0b11101000;
62452 let r = unsafe { _mm512_maskz_expandloadu_epi64(m, black_box(p)) };
62453 let e = _mm512_set_epi64(4, 3, 2, 0, 1, 0, 0, 0);
62454 assert_eq_m512i(r, e);
62455 }
62456
62457 #[simd_test(enable = "avx512f,avx512vl")]
62458 fn test_mm256_mask_expandloadu_epi64() {
62459 let src = _mm256_set1_epi64x(42);
62460 let a = &[1_i64, 2, 3, 4];
62461 let p = a.as_ptr();
62462 let m = 0b11101000;
62463 let r = unsafe { _mm256_mask_expandloadu_epi64(src, m, black_box(p)) };
62464 let e = _mm256_set_epi64x(1, 42, 42, 42);
62465 assert_eq_m256i(r, e);
62466 }
62467
62468 #[simd_test(enable = "avx512f,avx512vl")]
62469 fn test_mm256_maskz_expandloadu_epi64() {
62470 let a = &[1_i64, 2, 3, 4];
62471 let p = a.as_ptr();
62472 let m = 0b11101000;
62473 let r = unsafe { _mm256_maskz_expandloadu_epi64(m, black_box(p)) };
62474 let e = _mm256_set_epi64x(1, 0, 0, 0);
62475 assert_eq_m256i(r, e);
62476 }
62477
62478 #[simd_test(enable = "avx512f,avx512vl")]
62479 fn test_mm_mask_expandloadu_epi64() {
62480 let src = _mm_set1_epi64x(42);
62481 let a = &[1_i64, 2];
62482 let p = a.as_ptr();
62483 let m = 0b11101000;
62484 let r = unsafe { _mm_mask_expandloadu_epi64(src, m, black_box(p)) };
62485 let e = _mm_set_epi64x(42, 42);
62486 assert_eq_m128i(r, e);
62487 }
62488
62489 #[simd_test(enable = "avx512f,avx512vl")]
62490 fn test_mm_maskz_expandloadu_epi64() {
62491 let a = &[1_i64, 2];
62492 let p = a.as_ptr();
62493 let m = 0b11101000;
62494 let r = unsafe { _mm_maskz_expandloadu_epi64(m, black_box(p)) };
62495 let e = _mm_set_epi64x(0, 0);
62496 assert_eq_m128i(r, e);
62497 }
62498
62499 #[simd_test(enable = "avx512f")]
62500 fn test_mm512_mask_expandloadu_ps() {
62501 let src = _mm512_set1_ps(42.);
62502 let a = &[
62503 1.0f32, 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
62504 ];
62505 let p = a.as_ptr();
62506 let m = 0b11101000_11001010;
62507 let r = unsafe { _mm512_mask_expandloadu_ps(src, m, black_box(p)) };
62508 let e = _mm512_set_ps(
62509 8., 7., 6., 42., 5., 42., 42., 42., 4., 3., 42., 42., 2., 42., 1., 42.,
62510 );
62511 assert_eq_m512(r, e);
62512 }
62513
62514 #[simd_test(enable = "avx512f")]
62515 fn test_mm512_maskz_expandloadu_ps() {
62516 let a = &[
62517 1.0f32, 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
62518 ];
62519 let p = a.as_ptr();
62520 let m = 0b11101000_11001010;
62521 let r = unsafe { _mm512_maskz_expandloadu_ps(m, black_box(p)) };
62522 let e = _mm512_set_ps(
62523 8., 7., 6., 0., 5., 0., 0., 0., 4., 3., 0., 0., 2., 0., 1., 0.,
62524 );
62525 assert_eq_m512(r, e);
62526 }
62527
62528 #[simd_test(enable = "avx512f,avx512vl")]
62529 fn test_mm256_mask_expandloadu_ps() {
62530 let src = _mm256_set1_ps(42.);
62531 let a = &[1.0f32, 2., 3., 4., 5., 6., 7., 8.];
62532 let p = a.as_ptr();
62533 let m = 0b11101000;
62534 let r = unsafe { _mm256_mask_expandloadu_ps(src, m, black_box(p)) };
62535 let e = _mm256_set_ps(4., 3., 2., 42., 1., 42., 42., 42.);
62536 assert_eq_m256(r, e);
62537 }
62538
62539 #[simd_test(enable = "avx512f,avx512vl")]
62540 fn test_mm256_maskz_expandloadu_ps() {
62541 let a = &[1.0f32, 2., 3., 4., 5., 6., 7., 8.];
62542 let p = a.as_ptr();
62543 let m = 0b11101000;
62544 let r = unsafe { _mm256_maskz_expandloadu_ps(m, black_box(p)) };
62545 let e = _mm256_set_ps(4., 3., 2., 0., 1., 0., 0., 0.);
62546 assert_eq_m256(r, e);
62547 }
62548
62549 #[simd_test(enable = "avx512f,avx512vl")]
62550 fn test_mm_mask_expandloadu_ps() {
62551 let src = _mm_set1_ps(42.);
62552 let a = &[1.0f32, 2., 3., 4.];
62553 let p = a.as_ptr();
62554 let m = 0b11101000;
62555 let r = unsafe { _mm_mask_expandloadu_ps(src, m, black_box(p)) };
62556 let e = _mm_set_ps(1., 42., 42., 42.);
62557 assert_eq_m128(r, e);
62558 }
62559
62560 #[simd_test(enable = "avx512f,avx512vl")]
62561 fn test_mm_maskz_expandloadu_ps() {
62562 let a = &[1.0f32, 2., 3., 4.];
62563 let p = a.as_ptr();
62564 let m = 0b11101000;
62565 let r = unsafe { _mm_maskz_expandloadu_ps(m, black_box(p)) };
62566 let e = _mm_set_ps(1., 0., 0., 0.);
62567 assert_eq_m128(r, e);
62568 }
62569
62570 #[simd_test(enable = "avx512f")]
62571 fn test_mm512_mask_expandloadu_pd() {
62572 let src = _mm512_set1_pd(42.);
62573 let a = &[1.0f64, 2., 3., 4., 5., 6., 7., 8.];
62574 let p = a.as_ptr();
62575 let m = 0b11101000;
62576 let r = unsafe { _mm512_mask_expandloadu_pd(src, m, black_box(p)) };
62577 let e = _mm512_set_pd(4., 3., 2., 42., 1., 42., 42., 42.);
62578 assert_eq_m512d(r, e);
62579 }
62580
62581 #[simd_test(enable = "avx512f")]
62582 fn test_mm512_maskz_expandloadu_pd() {
62583 let a = &[1.0f64, 2., 3., 4., 5., 6., 7., 8.];
62584 let p = a.as_ptr();
62585 let m = 0b11101000;
62586 let r = unsafe { _mm512_maskz_expandloadu_pd(m, black_box(p)) };
62587 let e = _mm512_set_pd(4., 3., 2., 0., 1., 0., 0., 0.);
62588 assert_eq_m512d(r, e);
62589 }
62590
62591 #[simd_test(enable = "avx512f,avx512vl")]
62592 fn test_mm256_mask_expandloadu_pd() {
62593 let src = _mm256_set1_pd(42.);
62594 let a = &[1.0f64, 2., 3., 4.];
62595 let p = a.as_ptr();
62596 let m = 0b11101000;
62597 let r = unsafe { _mm256_mask_expandloadu_pd(src, m, black_box(p)) };
62598 let e = _mm256_set_pd(1., 42., 42., 42.);
62599 assert_eq_m256d(r, e);
62600 }
62601
62602 #[simd_test(enable = "avx512f,avx512vl")]
62603 fn test_mm256_maskz_expandloadu_pd() {
62604 let a = &[1.0f64, 2., 3., 4.];
62605 let p = a.as_ptr();
62606 let m = 0b11101000;
62607 let r = unsafe { _mm256_maskz_expandloadu_pd(m, black_box(p)) };
62608 let e = _mm256_set_pd(1., 0., 0., 0.);
62609 assert_eq_m256d(r, e);
62610 }
62611
62612 #[simd_test(enable = "avx512f,avx512vl")]
62613 fn test_mm_mask_expandloadu_pd() {
62614 let src = _mm_set1_pd(42.);
62615 let a = &[1.0f64, 2.];
62616 let p = a.as_ptr();
62617 let m = 0b11101000;
62618 let r = unsafe { _mm_mask_expandloadu_pd(src, m, black_box(p)) };
62619 let e = _mm_set_pd(42., 42.);
62620 assert_eq_m128d(r, e);
62621 }
62622
62623 #[simd_test(enable = "avx512f,avx512vl")]
62624 fn test_mm_maskz_expandloadu_pd() {
62625 let a = &[1.0f64, 2.];
62626 let p = a.as_ptr();
62627 let m = 0b11101000;
62628 let r = unsafe { _mm_maskz_expandloadu_pd(m, black_box(p)) };
62629 let e = _mm_set_pd(0., 0.);
62630 assert_eq_m128d(r, e);
62631 }
62632}
62633